diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 70a4b56e6..0596f22d3 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -447,3 +447,44 @@ jobs: DB_USERNAME: cognee DB_PASSWORD: cognee run: uv run python ./cognee/tests/test_conversation_history.py + + test-load: + name: Test Load + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + extra-dependencies: "aws" + + - name: Set File Descriptor Limit + run: sudo prlimit --pid $$ --nofile=4096:4096 + + - name: Verify File Descriptor Limit + run: ulimit -n + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Load Test + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: True + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + STORAGE_BACKEND: s3 + AWS_REGION: eu-west-1 + AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} + run: uv run python ./cognee/tests/test_load.py \ No newline at end of file diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py new file mode 100644 index 000000000..b38466bc7 --- /dev/null +++ b/cognee/tests/test_load.py @@ -0,0 +1,62 @@ +import os +import pathlib +import asyncio +import time + +import cognee +from cognee.modules.search.types import SearchType +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + + +async def process_and_search(num_of_searches): + start_time = time.time() + + await cognee.cognify() + + await asyncio.gather( + *[ + cognee.search( + query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION + ) + for _ in range(num_of_searches) + ] + ) + + end_time = time.time() + + return end_time - start_time + + +async def main(): + data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") + cognee.config.data_root_directory(data_directory_path) + + cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") + cognee.config.system_root_directory(cognee_directory_path) + + num_of_pdfs = 10 + num_of_reps = 5 + upper_boundary_minutes = 10 + average_minutes = 8 + + recorded_times = [] + for _ in range(num_of_reps): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + s3_input = "s3://cognee-test-load-s3-bucket" + await cognee.add(s3_input) + + recorded_times.append(await process_and_search(num_of_pdfs)) + + average_recorded_time = sum(recorded_times) / len(recorded_times) + + assert average_recorded_time <= average_minutes * 60 + + assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) + + +if __name__ == "__main__": + asyncio.run(main())