From ac5118ee34c4bd149ac26d042e2ffe5292ee3459 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:28:51 +0200 Subject: [PATCH 1/8] test:Add load test --- cognee/tests/load_test.py | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cognee/tests/load_test.py diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py new file mode 100644 index 000000000..da9b74ab9 --- /dev/null +++ b/cognee/tests/load_test.py @@ -0,0 +1,61 @@ +import os +import pathlib +import asyncio +import time + +import cognee +from cognee.modules.search.types import SearchType +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + +async def helper_func(num_of_searches): + + start_time = time.time() + + await cognee.cognify() + + await asyncio.gather( + *[ + cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + for _ in range(num_of_searches) + ] + ) + + end_time = time.time() + + return end_time - start_time + +async def main(): + + file_path = os.path.join( + pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" + ) + + num_of_pdfs = 10 + num_of_reps = 5 + upper_boundary_minutes = 3 + average_minutes = 1.5 + + await asyncio.gather( + *[ + cognee.add(file_path, dataset_name=f"dataset_{i}") + for i in range(num_of_pdfs) + ] + ) + + recorded_times = await asyncio.gather( + *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + ) + + average_recorded_time = sum(recorded_times) / len(recorded_times) + + assert average_recorded_time <= average_minutes * 60 + + assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) + + return + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From c16459d236a6e07b9267d323387b2be217fd5b46 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:58:05 +0200 Subject: [PATCH 2/8] test: Add prune step to the test --- cognee/tests/load_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py index da9b74ab9..c44efad00 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/load_test.py @@ -9,7 +9,7 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def helper_func(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() @@ -37,6 +37,9 @@ async def main(): upper_boundary_minutes = 3 average_minutes = 1.5 + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await asyncio.gather( *[ cognee.add(file_path, dataset_name=f"dataset_{i}") @@ -45,7 +48,7 @@ async def main(): ) recorded_times = await asyncio.gather( - *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] ) average_recorded_time = sum(recorded_times) / len(recorded_times) @@ -54,8 +57,6 @@ async def main(): assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) - return - if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file From c5648e63375d9eb1520f5a007dda520f70c9c145 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 22 Oct 2025 09:22:11 +0200 Subject: [PATCH 3/8] test: Add load test. --- .github/workflows/e2e_tests.yml | 31 ++++++++++++++++++++- cognee/tests/{load_test.py => test_load.py} | 30 ++++++++++++-------- 2 files changed, 49 insertions(+), 12 deletions(-) rename cognee/tests/{load_test.py => test_load.py} (65%) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 9582a3f3b..5f66e71d2 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -330,4 +330,33 @@ jobs: DB_PORT: 5432 DB_USERNAME: cognee DB_PASSWORD: cognee - run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py \ No newline at end of file + run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py + + test-load: + name: Test Load + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Load Test + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: True + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_load.py \ No newline at end of file diff --git a/cognee/tests/load_test.py b/cognee/tests/test_load.py similarity index 65% rename from cognee/tests/load_test.py rename to cognee/tests/test_load.py index c44efad00..09e2db084 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/test_load.py @@ -9,8 +9,8 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def process_and_search(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() await cognee.cognify() @@ -26,26 +26,34 @@ async def process_and_search(num_of_searches): return end_time - start_time -async def main(): +async def main(): file_path = os.path.join( pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" ) + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) num_of_pdfs = 10 num_of_reps = 5 - upper_boundary_minutes = 3 - average_minutes = 1.5 + upper_boundary_minutes = 10 + average_minutes = 8 await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await asyncio.gather( - *[ - cognee.add(file_path, dataset_name=f"dataset_{i}") - for i in range(num_of_pdfs) - ] - ) + for i in range(num_of_pdfs): + await cognee.add(file_path, dataset_name=f"dataset_{i}") recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] @@ -59,4 +67,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) From 897fbd2f09abfc1c3c5cc30fc2fcf17ed549ae80 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 27 Oct 2025 15:42:09 +0100 Subject: [PATCH 4/8] load test now uses s3 bucket --- cognee/tests/test_load.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index 09e2db084..f8d007d28 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -17,7 +17,9 @@ async def process_and_search(num_of_searches): await asyncio.gather( *[ - cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + cognee.search( + query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION + ) for _ in range(num_of_searches) ] ) @@ -28,9 +30,6 @@ async def process_and_search(num_of_searches): async def main(): - file_path = os.path.join( - pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" - ) data_directory_path = str( pathlib.Path( os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") @@ -52,8 +51,8 @@ async def main(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - for i in range(num_of_pdfs): - await cognee.add(file_path, dataset_name=f"dataset_{i}") + s3_input = "s3://cognee-load-test-s3-bucket" + await cognee.add(s3_input) recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] From 2b083dd0f110e44341d30a6228abb18591cfabac Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 09:27:33 +0100 Subject: [PATCH 5/8] small changes to load test --- cognee/tests/test_load.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index f8d007d28..a09ce053d 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -48,15 +48,15 @@ async def main(): upper_boundary_minutes = 10 average_minutes = 8 - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) + recorded_times = [] + for _ in range(num_of_reps): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) - s3_input = "s3://cognee-load-test-s3-bucket" - await cognee.add(s3_input) + s3_input = "s3://cognee-test-load-s3-bucket" + await cognee.add(s3_input) - recorded_times = await asyncio.gather( - *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] - ) + recorded_times.append(await process_and_search(num_of_pdfs)) average_recorded_time = sum(recorded_times) / len(recorded_times) From 4424bdc76471342119d59943c38d392dac9d72b0 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 17:06:51 +0100 Subject: [PATCH 6/8] test: fix path based on pr comment --- cognee/tests/test_load.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index a09ce053d..b38466bc7 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -30,17 +30,10 @@ async def process_and_search(num_of_searches): async def main(): - data_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") - ).resolve() - ) + data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") cognee.config.data_root_directory(data_directory_path) - cognee_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") - ).resolve() - ) + + cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") cognee.config.system_root_directory(cognee_directory_path) num_of_pdfs = 10 From eb8df45dab2cb7a07de7eb97570d364790f80080 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 18:10:19 +0100 Subject: [PATCH 7/8] test: increase file descriptor limit on workflow load test --- .github/workflows/e2e_tests.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index cf704c76a..79df3ff6b 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -463,6 +463,12 @@ jobs: with: python-version: '3.11.x' + - name: Set File Descriptor Limit + run: sudo prlimit --pid $$ --nofile=4096:4096 + + - name: Verify File Descriptor Limit + run: ulimit -n + - name: Dependencies already installed run: echo "Dependencies already installed in setup" @@ -478,4 +484,9 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + STORAGE_BACKEND: s3 + AWS_REGION: eu-west-1 + AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} run: uv run python ./cognee/tests/test_load.py \ No newline at end of file From a7d63df98c7ad21a40679c1b821acb30690c65d8 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 18:15:18 +0100 Subject: [PATCH 8/8] test: add extra aws dependency to load test --- .github/workflows/e2e_tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 79df3ff6b..0596f22d3 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -267,8 +267,6 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_edge_ingestion.py - - run_concurrent_subprocess_access_test: name: Concurrent Subprocess access test runs-on: ubuntu-latest @@ -450,7 +448,6 @@ jobs: DB_PASSWORD: cognee run: uv run python ./cognee/tests/test_conversation_history.py - test-load: name: Test Load runs-on: ubuntu-22.04 @@ -462,6 +459,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "aws" - name: Set File Descriptor Limit run: sudo prlimit --pid $$ --nofile=4096:4096