From ac5118ee34c4bd149ac26d042e2ffe5292ee3459 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:28:51 +0200 Subject: [PATCH 01/68] test:Add load test --- cognee/tests/load_test.py | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cognee/tests/load_test.py diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py new file mode 100644 index 000000000..da9b74ab9 --- /dev/null +++ b/cognee/tests/load_test.py @@ -0,0 +1,61 @@ +import os +import pathlib +import asyncio +import time + +import cognee +from cognee.modules.search.types import SearchType +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + +async def helper_func(num_of_searches): + + start_time = time.time() + + await cognee.cognify() + + await asyncio.gather( + *[ + cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + for _ in range(num_of_searches) + ] + ) + + end_time = time.time() + + return end_time - start_time + +async def main(): + + file_path = os.path.join( + pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" + ) + + num_of_pdfs = 10 + num_of_reps = 5 + upper_boundary_minutes = 3 + average_minutes = 1.5 + + await asyncio.gather( + *[ + cognee.add(file_path, dataset_name=f"dataset_{i}") + for i in range(num_of_pdfs) + ] + ) + + recorded_times = await asyncio.gather( + *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + ) + + average_recorded_time = sum(recorded_times) / len(recorded_times) + + assert average_recorded_time <= average_minutes * 60 + + assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) + + return + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From c16459d236a6e07b9267d323387b2be217fd5b46 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:58:05 +0200 Subject: [PATCH 02/68] test: Add prune step to the test --- cognee/tests/load_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py index da9b74ab9..c44efad00 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/load_test.py @@ -9,7 +9,7 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def helper_func(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() @@ -37,6 +37,9 @@ async def main(): upper_boundary_minutes = 3 average_minutes = 1.5 + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await asyncio.gather( *[ cognee.add(file_path, dataset_name=f"dataset_{i}") @@ -45,7 +48,7 @@ async def main(): ) recorded_times = await asyncio.gather( - *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] ) average_recorded_time = sum(recorded_times) / len(recorded_times) @@ -54,8 +57,6 @@ async def main(): assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) - return - if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file From c5648e63375d9eb1520f5a007dda520f70c9c145 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 22 Oct 2025 09:22:11 +0200 Subject: [PATCH 03/68] test: Add load test. --- .github/workflows/e2e_tests.yml | 31 ++++++++++++++++++++- cognee/tests/{load_test.py => test_load.py} | 30 ++++++++++++-------- 2 files changed, 49 insertions(+), 12 deletions(-) rename cognee/tests/{load_test.py => test_load.py} (65%) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 9582a3f3b..5f66e71d2 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -330,4 +330,33 @@ jobs: DB_PORT: 5432 DB_USERNAME: cognee DB_PASSWORD: cognee - run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py \ No newline at end of file + run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py + + test-load: + name: Test Load + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Load Test + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: True + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_load.py \ No newline at end of file diff --git a/cognee/tests/load_test.py b/cognee/tests/test_load.py similarity index 65% rename from cognee/tests/load_test.py rename to cognee/tests/test_load.py index c44efad00..09e2db084 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/test_load.py @@ -9,8 +9,8 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def process_and_search(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() await cognee.cognify() @@ -26,26 +26,34 @@ async def process_and_search(num_of_searches): return end_time - start_time -async def main(): +async def main(): file_path = os.path.join( pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" ) + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) num_of_pdfs = 10 num_of_reps = 5 - upper_boundary_minutes = 3 - average_minutes = 1.5 + upper_boundary_minutes = 10 + average_minutes = 8 await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await asyncio.gather( - *[ - cognee.add(file_path, dataset_name=f"dataset_{i}") - for i in range(num_of_pdfs) - ] - ) + for i in range(num_of_pdfs): + await cognee.add(file_path, dataset_name=f"dataset_{i}") recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] @@ -59,4 +67,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) From ebb9a1b10278f12c9b7217e3f8dba94d798f1b9d Mon Sep 17 00:00:00 2001 From: chinu0609 Date: Wed, 22 Oct 2025 21:36:53 +0530 Subject: [PATCH 04/68] fix: change /api/embeddings to /api/embed in .env.template --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index 7fd3ba9e8..0bf06af57 100644 --- a/.env.template +++ b/.env.template @@ -249,7 +249,7 @@ LITELLM_LOG="ERROR" #LLM_ENDPOINT="http://localhost:11434/v1" #EMBEDDING_PROVIDER="ollama" #EMBEDDING_MODEL="avr/sfr-embedding-mistral:latest" -#EMBEDDING_ENDPOINT="http://localhost:11434/api/embeddings" +#EMBEDDING_ENDPOINT="http://localhost:11434/api/embed" #EMBEDDING_DIMENSIONS=4096 #HUGGINGFACE_TOKENIZER="Salesforce/SFR-Embedding-Mistral" From fa54469ea3fb2370d6ef1273a727eda814d88040 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 24 Oct 2025 14:28:30 +0100 Subject: [PATCH 05/68] fix: remove redundant SDKs to reduce space used on Github Runner --- .github/workflows/dockerhub-mcp.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index 3d4ef4fe1..7a23df936 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -10,11 +10,23 @@ jobs: runs-on: ubuntu-latest steps: + - name: Check and free disk space before build + run: | + echo "=== Before cleanup ===" + df -h + echo "Removing unused preinstalled SDKs to free space..." + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true + docker system prune -af || true + echo "=== After cleanup ===" + df -h + - name: Checkout repository uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --root /tmp/buildkit - name: Log in to Docker Hub uses: docker/login-action@v3 From eb40945c6d7394ebb9e997b9bb19631411c2a3a1 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 10:26:46 +0200 Subject: [PATCH 06/68] added logs --- cognee/api/client.py | 5 +++++ docker-compose.yml | 2 +- entrypoint.sh | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/api/client.py b/cognee/api/client.py index 6766c12de..19a607ff0 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -39,6 +39,8 @@ from cognee.api.v1.users.routers import ( ) from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION +# Ensure application logging is configured for container stdout/stderr +setup_logging() logger = get_logger() if os.getenv("ENV", "prod") == "prod": @@ -74,6 +76,9 @@ async def lifespan(app: FastAPI): await get_default_user() + # Emit a clear startup message for docker logs + logger.info("Backend server has started") + yield diff --git a/docker-compose.yml b/docker-compose.yml index 43d9b2607..472f24c21 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - DEBUG=false # Change to true if debugging - HOST=0.0.0.0 - ENVIRONMENT=local - - LOG_LEVEL=ERROR + - LOG_LEVEL=INFO extra_hosts: # Allows the container to reach your local machine using "host.docker.internal" instead of "localhost" - "host.docker.internal:host-gateway" diff --git a/entrypoint.sh b/entrypoint.sh index bad9b7aa3..496825408 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -43,10 +43,10 @@ sleep 2 if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then if [ "$DEBUG" = "true" ]; then echo "Waiting for the debugger to attach..." - debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app + exec debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app else - gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app + exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app fi else - gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error cognee.api.client:app + exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error --access-logfile - --error-logfile - cognee.api.client:app fi From 67c700db3b6701451f879aafb97e32cef3b2c469 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 11:58:34 +0200 Subject: [PATCH 07/68] removed coding assistance --- cognee-mcp/pyproject.toml | 2 +- cognee-mcp/src/client.py | 4 +-- cognee-mcp/uv.lock | 64 ++------------------------------------- 3 files changed, 5 insertions(+), 65 deletions(-) diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml index c3327b67f..f37bf337c 100644 --- a/cognee-mcp/pyproject.toml +++ b/cognee-mcp/pyproject.toml @@ -9,7 +9,7 @@ dependencies = [ # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. #"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/igorilic/Desktop/cognee", # TODO: Remove gemini from optional dependecnies for new Cognee version after 0.3.4 - "cognee[postgres,codegraph,huggingface,docs,neo4j]==0.3.7", + "cognee[postgres,docs,neo4j]==0.3.7", "fastmcp>=2.10.0,<3.0.0", "mcp>=1.12.0,<2.0.0", "uv>=0.6.3,<1.0.0", diff --git a/cognee-mcp/src/client.py b/cognee-mcp/src/client.py index 2d6bdfe18..407a3896b 100755 --- a/cognee-mcp/src/client.py +++ b/cognee-mcp/src/client.py @@ -38,11 +38,11 @@ async def run(): toolResult = await session.call_tool("prune", arguments={}) toolResult = await session.call_tool( - "codify", arguments={"repo_path": "SOME_REPO_PATH"} + "cognify", arguments={} ) toolResult = await session.call_tool( - "search", arguments={"search_type": "CODE", "search_query": "exceptions"} + "search", arguments={"search_type": "GRAPH_COMPLETION"} ) print(f"Cognify result: {toolResult.content}") diff --git a/cognee-mcp/uv.lock b/cognee-mcp/uv.lock index daa88edef..a6a6fb511 100644 --- a/cognee-mcp/uv.lock +++ b/cognee-mcp/uv.lock @@ -718,19 +718,10 @@ wheels = [ ] [package.optional-dependencies] -codegraph = [ - { name = "fastembed", marker = "python_full_version < '3.13'" }, - { name = "transformers" }, - { name = "tree-sitter" }, - { name = "tree-sitter-python" }, -] docs = [ { name = "lxml" }, { name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "pdf", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] }, ] -huggingface = [ - { name = "transformers" }, -] neo4j = [ { name = "neo4j" }, ] @@ -745,7 +736,7 @@ name = "cognee-mcp" version = "0.4.0" source = { editable = "." } dependencies = [ - { name = "cognee", extra = ["codegraph", "docs", "huggingface", "neo4j", "postgres"] }, + { name = "cognee", extra = ["docs", "neo4j", "postgres"] }, { name = "fastmcp" }, { name = "httpx" }, { name = "mcp" }, @@ -759,7 +750,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "cognee", extras = ["postgres", "codegraph", "huggingface", "docs", "neo4j"], specifier = "==0.3.7" }, + { name = "cognee", extras = ["postgres", "docs", "neo4j"], specifier = "==0.3.7" }, { name = "fastmcp", specifier = ">=2.10.0,<3.0.0" }, { name = "httpx", specifier = ">=0.27.0,<1.0.0" }, { name = "mcp", specifier = ">=1.12.0,<2.0.0" }, @@ -6038,57 +6029,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/2b/4d2708ac1ff5cd708b6548f4c5812d0ae40d1c28591c4c1c762b6dbdef2d/transformers-4.57.0-py3-none-any.whl", hash = "sha256:9d7c6d098c026e40d897e017ed1f481ab803cbac041021dbc6ae6100e4949b55", size = 11990588 }, ] -[[package]] -name = "tree-sitter" -version = "0.24.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/a2/698b9d31d08ad5558f8bfbfe3a0781bd4b1f284e89bde3ad18e05101a892/tree-sitter-0.24.0.tar.gz", hash = "sha256:abd95af65ca2f4f7eca356343391ed669e764f37748b5352946f00f7fc78e734", size = 168304 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/9a/bd627a02e41671af73222316e1fcf87772c7804dc2fba99405275eb1f3eb/tree_sitter-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3f00feff1fc47a8e4863561b8da8f5e023d382dd31ed3e43cd11d4cae445445", size = 140890 }, - { url = "https://files.pythonhosted.org/packages/5b/9b/b1ccfb187f8be78e2116176a091a2f2abfd043a06d78f80c97c97f315b37/tree_sitter-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f9691be48d98c49ef8f498460278884c666b44129222ed6217477dffad5d4831", size = 134413 }, - { url = "https://files.pythonhosted.org/packages/01/39/e25b0042a049eb27e991133a7aa7c49bb8e49a8a7b44ca34e7e6353ba7ac/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:098a81df9f89cf254d92c1cd0660a838593f85d7505b28249216661d87adde4a", size = 560427 }, - { url = "https://files.pythonhosted.org/packages/1c/59/4d132f1388da5242151b90acf32cc56af779bfba063923699ab28b276b62/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b26bf9e958da6eb7e74a081aab9d9c7d05f9baeaa830dbb67481898fd16f1f5", size = 574327 }, - { url = "https://files.pythonhosted.org/packages/ec/97/3914e45ab9e0ff0f157e493caa91791372508488b97ff0961a0640a37d25/tree_sitter-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2a84ff87a2f2a008867a1064aba510ab3bd608e3e0cd6e8fef0379efee266c73", size = 577171 }, - { url = "https://files.pythonhosted.org/packages/c5/b0/266a529c3eef171137b73cde8ad7aa282734354609a8b2f5564428e8f12d/tree_sitter-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:c012e4c345c57a95d92ab5a890c637aaa51ab3b7ff25ed7069834b1087361c95", size = 120260 }, - { url = "https://files.pythonhosted.org/packages/c1/c3/07bfaa345e0037ff75d98b7a643cf940146e4092a1fd54eed0359836be03/tree_sitter-0.24.0-cp310-cp310-win_arm64.whl", hash = "sha256:033506c1bc2ba7bd559b23a6bdbeaf1127cee3c68a094b82396718596dfe98bc", size = 108416 }, - { url = "https://files.pythonhosted.org/packages/66/08/82aaf7cbea7286ee2a0b43e9b75cb93ac6ac132991b7d3c26ebe5e5235a3/tree_sitter-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de0fb7c18c6068cacff46250c0a0473e8fc74d673e3e86555f131c2c1346fb13", size = 140733 }, - { url = "https://files.pythonhosted.org/packages/8c/bd/1a84574911c40734d80327495e6e218e8f17ef318dd62bb66b55c1e969f5/tree_sitter-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7c9c89666dea2ce2b2bf98e75f429d2876c569fab966afefdcd71974c6d8538", size = 134243 }, - { url = "https://files.pythonhosted.org/packages/46/c1/c2037af2c44996d7bde84eb1c9e42308cc84b547dd6da7f8a8bea33007e1/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddb113e6b8b3e3b199695b1492a47d87d06c538e63050823d90ef13cac585fd", size = 562030 }, - { url = "https://files.pythonhosted.org/packages/4c/aa/2fb4d81886df958e6ec7e370895f7106d46d0bbdcc531768326124dc8972/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01ea01a7003b88b92f7f875da6ba9d5d741e0c84bb1bd92c503c0eecd0ee6409", size = 575585 }, - { url = "https://files.pythonhosted.org/packages/e3/3c/5f997ce34c0d1b744e0f0c0757113bdfc173a2e3dadda92c751685cfcbd1/tree_sitter-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:464fa5b2cac63608915a9de8a6efd67a4da1929e603ea86abaeae2cb1fe89921", size = 578203 }, - { url = "https://files.pythonhosted.org/packages/d5/1f/f2bc7fa7c3081653ea4f2639e06ff0af4616c47105dbcc0746137da7620d/tree_sitter-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:3b1f3cbd9700e1fba0be2e7d801527e37c49fc02dc140714669144ef6ab58dce", size = 120147 }, - { url = "https://files.pythonhosted.org/packages/c0/4c/9add771772c4d72a328e656367ca948e389432548696a3819b69cdd6f41e/tree_sitter-0.24.0-cp311-cp311-win_arm64.whl", hash = "sha256:f3f08a2ca9f600b3758792ba2406971665ffbad810847398d180c48cee174ee2", size = 108302 }, - { url = "https://files.pythonhosted.org/packages/e9/57/3a590f287b5aa60c07d5545953912be3d252481bf5e178f750db75572bff/tree_sitter-0.24.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:14beeff5f11e223c37be7d5d119819880601a80d0399abe8c738ae2288804afc", size = 140788 }, - { url = "https://files.pythonhosted.org/packages/61/0b/fc289e0cba7dbe77c6655a4dd949cd23c663fd62a8b4d8f02f97e28d7fe5/tree_sitter-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26a5b130f70d5925d67b47db314da209063664585a2fd36fa69e0717738efaf4", size = 133945 }, - { url = "https://files.pythonhosted.org/packages/86/d7/80767238308a137e0b5b5c947aa243e3c1e3e430e6d0d5ae94b9a9ffd1a2/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fc5c3c26d83c9d0ecb4fc4304fba35f034b7761d35286b936c1db1217558b4e", size = 564819 }, - { url = "https://files.pythonhosted.org/packages/bf/b3/6c5574f4b937b836601f5fb556b24804b0a6341f2eb42f40c0e6464339f4/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:772e1bd8c0931c866b848d0369b32218ac97c24b04790ec4b0e409901945dd8e", size = 579303 }, - { url = "https://files.pythonhosted.org/packages/0a/f4/bd0ddf9abe242ea67cca18a64810f8af230fc1ea74b28bb702e838ccd874/tree_sitter-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:24a8dd03b0d6b8812425f3b84d2f4763322684e38baf74e5bb766128b5633dc7", size = 581054 }, - { url = "https://files.pythonhosted.org/packages/8c/1c/ff23fa4931b6ef1bbeac461b904ca7e49eaec7e7e5398584e3eef836ec96/tree_sitter-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9e8b1605ab60ed43803100f067eed71b0b0e6c1fb9860a262727dbfbbb74751", size = 120221 }, - { url = "https://files.pythonhosted.org/packages/b2/2a/9979c626f303177b7612a802237d0533155bf1e425ff6f73cc40f25453e2/tree_sitter-0.24.0-cp312-cp312-win_arm64.whl", hash = "sha256:f733a83d8355fc95561582b66bbea92ffd365c5d7a665bc9ebd25e049c2b2abb", size = 108234 }, - { url = "https://files.pythonhosted.org/packages/61/cd/2348339c85803330ce38cee1c6cbbfa78a656b34ff58606ebaf5c9e83bd0/tree_sitter-0.24.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d4a6416ed421c4210f0ca405a4834d5ccfbb8ad6692d4d74f7773ef68f92071", size = 140781 }, - { url = "https://files.pythonhosted.org/packages/8b/a3/1ea9d8b64e8dcfcc0051028a9c84a630301290995cd6e947bf88267ef7b1/tree_sitter-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e0992d483677e71d5c5d37f30dfb2e3afec2f932a9c53eec4fca13869b788c6c", size = 133928 }, - { url = "https://files.pythonhosted.org/packages/fe/ae/55c1055609c9428a4aedf4b164400ab9adb0b1bf1538b51f4b3748a6c983/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57277a12fbcefb1c8b206186068d456c600dbfbc3fd6c76968ee22614c5cd5ad", size = 564497 }, - { url = "https://files.pythonhosted.org/packages/ce/d0/f2ffcd04882c5aa28d205a787353130cbf84b2b8a977fd211bdc3b399ae3/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fa22766d63f73716c6fec1a31ee5cf904aa429484256bd5fdf5259051ed74", size = 578917 }, - { url = "https://files.pythonhosted.org/packages/af/82/aebe78ea23a2b3a79324993d4915f3093ad1af43d7c2208ee90be9273273/tree_sitter-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d5d9537507e1c8c5fa9935b34f320bfec4114d675e028f3ad94f11cf9db37b9", size = 581148 }, - { url = "https://files.pythonhosted.org/packages/a1/b4/6b0291a590c2b0417cfdb64ccb8ea242f270a46ed429c641fbc2bfab77e0/tree_sitter-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:f58bb4956917715ec4d5a28681829a8dad5c342cafd4aea269f9132a83ca9b34", size = 120207 }, - { url = "https://files.pythonhosted.org/packages/a8/18/542fd844b75272630229c9939b03f7db232c71a9d82aadc59c596319ea6a/tree_sitter-0.24.0-cp313-cp313-win_arm64.whl", hash = "sha256:23641bd25dcd4bb0b6fa91b8fb3f46cc9f1c9f475efe4d536d3f1f688d1b84c8", size = 108232 }, -] - -[[package]] -name = "tree-sitter-python" -version = "0.23.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1c/30/6766433b31be476fda6569a3a374c2220e45ffee0bff75460038a57bf23b/tree_sitter_python-0.23.6.tar.gz", hash = "sha256:354bfa0a2f9217431764a631516f85173e9711af2c13dbd796a8815acfe505d9", size = 155868 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/67/577a02acae5f776007c924ca86ef14c19c12e71de0aa9d2a036f3c248e7b/tree_sitter_python-0.23.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:28fbec8f74eeb2b30292d97715e60fac9ccf8a8091ce19b9d93e9b580ed280fb", size = 74361 }, - { url = "https://files.pythonhosted.org/packages/d2/a6/194b3625a7245c532ad418130d63077ce6cd241152524152f533e4d6edb0/tree_sitter_python-0.23.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:680b710051b144fedf61c95197db0094f2245e82551bf7f0c501356333571f7a", size = 76436 }, - { url = "https://files.pythonhosted.org/packages/d0/62/1da112689d6d282920e62c40e67ab39ea56463b0e7167bfc5e81818a770e/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a9dcef55507b6567207e8ee0a6b053d0688019b47ff7f26edc1764b7f4dc0a4", size = 112060 }, - { url = "https://files.pythonhosted.org/packages/5d/62/c9358584c96e38318d69b6704653684fd8467601f7b74e88aa44f4e6903f/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29dacdc0cd2f64e55e61d96c6906533ebb2791972bec988450c46cce60092f5d", size = 112338 }, - { url = "https://files.pythonhosted.org/packages/1a/58/c5e61add45e34fb8ecbf057c500bae9d96ed7c9ca36edb7985da8ae45526/tree_sitter_python-0.23.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7e048733c36f564b379831689006801feb267d8194f9e793fbb395ef1723335d", size = 109382 }, - { url = "https://files.pythonhosted.org/packages/e9/f3/9b30893cae9b3811fe652dc6f90aaadfda12ae0b2757f5722fc7266f423c/tree_sitter_python-0.23.6-cp39-abi3-win_amd64.whl", hash = "sha256:a24027248399fb41594b696f929f9956828ae7cc85596d9f775e6c239cd0c2be", size = 75904 }, - { url = "https://files.pythonhosted.org/packages/87/cb/ce35a65f83a47b510d8a2f1eddf3bdbb0d57aabc87351c8788caf3309f76/tree_sitter_python-0.23.6-cp39-abi3-win_arm64.whl", hash = "sha256:71334371bd73d5fe080aed39fbff49ed8efb9506edebe16795b0c7567ed6a272", size = 73649 }, -] - [[package]] name = "triton" version = "3.5.0" From 23e219b547518d813ecead9097c8801cc160f59a Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 11:59:46 +0200 Subject: [PATCH 08/68] add format --- cognee-mcp/src/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cognee-mcp/src/client.py b/cognee-mcp/src/client.py index 407a3896b..952503ee7 100755 --- a/cognee-mcp/src/client.py +++ b/cognee-mcp/src/client.py @@ -37,9 +37,7 @@ async def run(): toolResult = await session.call_tool("prune", arguments={}) - toolResult = await session.call_tool( - "cognify", arguments={} - ) + toolResult = await session.call_tool("cognify", arguments={}) toolResult = await session.call_tool( "search", arguments={"search_type": "GRAPH_COMPLETION"} From 9d497adb24ebab9dcbdf0b9faf4d2789b26f7808 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 12:20:49 +0200 Subject: [PATCH 09/68] fixing the docker build --- .github/workflows/dockerhub-mcp.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index 7a23df936..2b160225c 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -46,7 +46,7 @@ jobs: - name: Build and push id: build - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: . platforms: linux/amd64,linux/arm64 @@ -56,6 +56,8 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=registry,ref=cognee/cognee-mcp:buildcache cache-to: type=registry,ref=cognee/cognee-mcp:buildcache,mode=max + provenance: false + sbom: false - name: Image digest run: echo ${{ steps.build.outputs.digest }} From c6c11d497315ed0890bc0b1eb7ba9cd963626c01 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 12:29:14 +0200 Subject: [PATCH 10/68] added storage check --- .github/workflows/dockerhub-mcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index 2b160225c..204da8b2f 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -26,7 +26,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: - buildkitd-flags: --root /tmp/buildkit + buildkitd-flags: --root /tmp/buildkit --oci-worker-gc --oci-worker-gc-keepstorage=8GB - name: Log in to Docker Hub uses: docker/login-action@v3 From 55d213f17df352050ce933d36f435de88510b5da Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 12:47:50 +0200 Subject: [PATCH 11/68] added a new runner --- .github/workflows/dockerhub-mcp.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index 204da8b2f..f615980e7 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -7,7 +7,10 @@ on: jobs: docker-build-and-push: - runs-on: ubuntu-latest + runs-on: + group: Runners + labels: + - docker_build_runner steps: - name: Check and free disk space before build From f6585102551ef8f07f3dc42d96cde6b28d41b143 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 25 Oct 2025 12:51:38 +0200 Subject: [PATCH 12/68] Change runner group from 'Runners' to 'Default' --- .github/workflows/dockerhub-mcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index f615980e7..f1656d45b 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -8,7 +8,7 @@ on: jobs: docker-build-and-push: runs-on: - group: Runners + group: Default labels: - docker_build_runner From 8963e8c57cccb0b68c89a586120c47ab6f479872 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 25 Oct 2025 13:25:16 +0200 Subject: [PATCH 13/68] Correct typo in installation section of README Fixed a typo in the installation instructions. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 305bffdfe..d51a380b1 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ Hosted platform: ### 📦 Installation -You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager. +You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.. Cognee supports Python 3.10 to 3.12 From 7f6f7bbc70f6d034dfacf3a5bc7d834122245ca9 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 25 Oct 2025 13:40:21 +0200 Subject: [PATCH 14/68] Simplify Docker Buildx setup in workflow Removed unnecessary buildkitd-flags and provenance options. --- .github/workflows/dockerhub-mcp.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dockerhub-mcp.yml b/.github/workflows/dockerhub-mcp.yml index f1656d45b..5a1f28296 100644 --- a/.github/workflows/dockerhub-mcp.yml +++ b/.github/workflows/dockerhub-mcp.yml @@ -29,7 +29,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: - buildkitd-flags: --root /tmp/buildkit --oci-worker-gc --oci-worker-gc-keepstorage=8GB + buildkitd-flags: --root /tmp/buildkit - name: Log in to Docker Hub uses: docker/login-action@v3 @@ -59,8 +59,7 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=registry,ref=cognee/cognee-mcp:buildcache cache-to: type=registry,ref=cognee/cognee-mcp:buildcache,mode=max - provenance: false - sbom: false + - name: Image digest run: echo ${{ steps.build.outputs.digest }} From 0e62c4f1172384e339071ebe918335fbaf11a4b1 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 26 Oct 2025 10:11:52 +0100 Subject: [PATCH 15/68] added ability to config logs --- cognee/base_config.py | 10 +++++ cognee/shared/logging_utils.py | 71 +++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/cognee/base_config.py b/cognee/base_config.py index a2ad06249..285f30699 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -11,6 +11,7 @@ class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") cache_root_directory: str = get_absolute_path(".cognee_cache") + logs_root_directory: str = os.getenv("COGNEE_LOGS_DIR", "/tmp/cognee_logs") monitoring_tool: object = Observer.NONE @pydantic.model_validator(mode="after") @@ -30,6 +31,14 @@ class BaseConfig(BaseSettings): # Require absolute paths for root directories self.data_root_directory = ensure_absolute_path(self.data_root_directory) self.system_root_directory = ensure_absolute_path(self.system_root_directory) + # logs_root_directory may be outside project root; keep as-is if absolute or make absolute if relative + try: + if not os.path.isabs(self.logs_root_directory): + # If relative, place under current working directory + self.logs_root_directory = os.path.abspath(self.logs_root_directory) + except Exception: + # If anything goes wrong, fall back to /tmp/cognee_logs + self.logs_root_directory = "/tmp/cognee_logs" # Set monitoring tool based on available keys if self.langfuse_public_key and self.langfuse_secret_key: self.monitoring_tool = Observer.LANGFUSE @@ -49,6 +58,7 @@ class BaseConfig(BaseSettings): "system_root_directory": self.system_root_directory, "monitoring_tool": self.monitoring_tool, "cache_root_directory": self.cache_root_directory, + "logs_root_directory": self.logs_root_directory, } diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index 6d160446e..b4a501216 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -76,9 +76,49 @@ log_levels = { # Track if structlog logging has been configured _is_structlog_configured = False -# Path to logs directory -LOGS_DIR = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "logs")) -LOGS_DIR.mkdir(exist_ok=True) # Create logs dir if it doesn't exist +# Logging directory resolution +# Default writable location for most Unix-based systems +DEFAULT_LOGS_DIR = "/tmp/cognee_logs" + + +def _resolve_logs_dir(): + """Resolve a writable logs directory. + + Priority: + 1) BaseConfig.logs_root_directory (respects COGNEE_LOGS_DIR) + 2) /tmp/cognee_logs (default, best-effort create) + 3) ./logs in current working directory (last resort) + + Returns a Path or None if none are writable/creatable. + """ + candidate_paths = [] + + # Prefer configuration from BaseConfig + try: + from cognee.base_config import get_base_config + + base_config = get_base_config() + if getattr(base_config, "logs_root_directory", None): + candidate_paths.append(Path(base_config.logs_root_directory)) + except Exception: + # If base config is unavailable during early imports, fall back to env + env_dir = os.environ.get("COGNEE_LOGS_DIR") + if env_dir: + candidate_paths.append(Path(env_dir)) + candidate_paths.append(Path(DEFAULT_LOGS_DIR)) + candidate_paths.append(Path.cwd() / "logs") + + for candidate in candidate_paths: + try: + candidate.mkdir(parents=True, exist_ok=True) + if os.access(candidate, os.W_OK): + return candidate + except Exception: + # Try next candidate + continue + + return None + # Maximum number of log files to keep MAX_LOG_FILES = 10 @@ -430,27 +470,37 @@ def setup_logging(log_level=None, name=None): stream_handler.setFormatter(console_formatter) stream_handler.setLevel(log_level) + # Resolve logs directory with env and safe fallbacks + logs_dir = _resolve_logs_dir() + # Check if we already have a log file path from the environment # NOTE: environment variable must be used here as it allows us to # log to a single file with a name based on a timestamp in a multiprocess setting. # Without it, we would have a separate log file for every process. log_file_path = os.environ.get("LOG_FILE_NAME") - if not log_file_path: + if not log_file_path and logs_dir is not None: # Create a new log file name with the cognee start time start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log") + log_file_path = str((logs_dir / f"{start_time}.log").resolve()) os.environ["LOG_FILE_NAME"] = log_file_path - # Create a file handler that uses our custom PlainFileHandler - file_handler = PlainFileHandler(log_file_path, encoding="utf-8") - file_handler.setLevel(DEBUG) + # Create a file handler that uses our custom PlainFileHandler if possible + file_handler = None + if log_file_path: + try: + file_handler = PlainFileHandler(log_file_path, encoding="utf-8") + file_handler.setLevel(DEBUG) + except Exception: + # If file handler cannot be created, fall back to console-only logging + file_handler = None # Configure root logger root_logger = logging.getLogger() if root_logger.hasHandlers(): root_logger.handlers.clear() root_logger.addHandler(stream_handler) - root_logger.addHandler(file_handler) + if file_handler is not None: + root_logger.addHandler(file_handler) root_logger.setLevel(log_level) if log_level > logging.DEBUG: @@ -466,7 +516,8 @@ def setup_logging(log_level=None, name=None): ) # Clean up old log files, keeping only the most recent ones - cleanup_old_logs(LOGS_DIR, MAX_LOG_FILES) + if logs_dir is not None: + cleanup_old_logs(logs_dir, MAX_LOG_FILES) # Mark logging as configured _is_structlog_configured = True From 3cd49ae9dd521c242d0a9c8b1de1d8a4784c3bf1 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sun, 26 Oct 2025 11:18:17 +0100 Subject: [PATCH 16/68] Add repository guidelines to AGENTS.md This document provides guidelines for working with the cognee repository, including project structure, development commands, coding style, testing, and commit practices. --- AGENTS.md | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..4cadfbdc1 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,132 @@ +## Repository Guidelines + +This document summarizes how to work with the cognee repository: how it’s organized, how to build, test, lint, and contribute. It mirrors our actual tooling and CI while providing quick commands for local development. + +## Project Structure & Module Organization + +- `cognee/`: Core Python library and API. + - `api/`: FastAPI application and versioned routers (add, cognify, memify, search, delete, users, datasets, responses, visualize, settings, sync, update, checks). + - `cli/`: CLI entry points and subcommands invoked via `cognee` / `cognee-cli`. + - `infrastructure/`: Databases, LLM providers, embeddings, loaders, and storage adapters. + - `modules/`: Domain logic (graph, retrieval, ontology, users, processing, observability, etc.). + - `tasks/`: Reusable tasks (e.g., code graph, web scraping, storage). Extend with new tasks here. + - `eval_framework/`: Evaluation utilities and adapters. + - `shared/`: Cross-cutting helpers (logging, settings, utils). + - `tests/`: Unit, integration, CLI, and end-to-end tests organized by feature. + - `__main__.py`: Entrypoint to route to CLI. +- `cognee-mcp/`: Model Context Protocol server exposing cognee as MCP tools (SSE/HTTP/stdio). Contains its own README and Dockerfile. +- `cognee-frontend/`: Next.js UI for local development and demos. +- `distributed/`: Utilities for distributed execution (Modal, workers, queues). +- `examples/`: Example scripts demonstrating the public APIs and features (graph, code graph, multimodal, permissions, etc.). +- `notebooks/`: Jupyter notebooks for demos and tutorials. +- `alembic/`: Database migrations for relational backends. + +Notes: +- Co-locate feature-specific helpers under their respective package (`modules/`, `infrastructure/`, or `tasks/`). +- Extend the system by adding new tasks, loaders, or retrievers rather than modifying core pipeline mechanisms. + +## Build, Test, and Development Commands + +Python (root) – requires Python >= 3.10 and < 3.14. We recommend `uv` for speed and reproducibility. + +- Create/refresh env and install dev deps: +```bash +uv sync --dev --all-extras --reinstall +``` + +- Run the CLI (examples): +```bash +uv run cognee-cli add "Cognee turns documents into AI memory." +uv run cognee-cli cognify +uv run cognee-cli search "What does cognee do?" +uv run cognee-cli -ui # Launches UI, backend API, and MCP server together +``` + +- Start the FastAPI server directly: +```bash +uv run python -m cognee.api.client +``` + +- Run tests (CI mirrors these commands): +```bash +uv run pytest cognee/tests/unit/ -v +uv run pytest cognee/tests/integration/ -v +``` + +- Lint and format (ruff): +```bash +uv run ruff check . +uv run ruff format . +``` + +- Optional static type checks (mypy): +```bash +uv run mypy cognee/ +``` + +MCP Server (`cognee-mcp/`): + +- Install and run locally: +```bash +cd cognee-mcp +uv sync --dev --all-extras --reinstall +uv run python src/server.py # stdio (default) +uv run python src/server.py --transport sse +uv run python src/server.py --transport http --host 127.0.0.1 --port 8000 --path /mcp +``` + +- API Mode (connect to a running Cognee API): +```bash +uv run python src/server.py --transport sse --api-url http://localhost:8000 --api-token YOUR_TOKEN +``` + +- Docker quickstart (examples): see `cognee-mcp/README.md` for full details +```bash +docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main +``` + +Frontend (`cognee-frontend/`): +```bash +cd cognee-frontend +npm install +npm run dev # Next.js dev server +npm run lint # ESLint +npm run build && npm start +``` + +## Coding Style & Naming Conventions + +Python: +- 4-space indentation, modules and functions in `snake_case`, classes in `PascalCase`. +- Public APIs should be type-annotated where practical. +- Use `ruff format` before committing; `ruff check` enforces import hygiene and style (line-length 100 configured in `pyproject.toml`). +- Prefer explicit, structured error handling. Use shared logging utilities in `cognee.shared.logging_utils`. + +MCP server and Frontend: +- Follow the local `README.md` and ESLint/TypeScript configuration in `cognee-frontend/`. + +## Testing Guidelines + +- Place Python tests under `cognee/tests/`. + - Unit tests: `cognee/tests/unit/` + - Integration tests: `cognee/tests/integration/` + - CLI tests: `cognee/tests/cli_tests/` +- Name test files `test_*.py`. Use `pytest.mark.asyncio` for async tests. +- Avoid external state; rely on test fixtures and the CI-provided env vars when LLM/embedding providers are required. See CI workflows under `.github/workflows/` for expected environment variables. +- When adding public APIs, provide/update targeted examples under `examples/python/`. + +## Commit & Pull Request Guidelines + +- Use clear, imperative subjects (≤ 72 chars) and conventional commit styling in PR titles. Our CI validates semantic PR titles (see `.github/workflows/pr_lint`). Examples: + - `feat(graph): add temporal edge weighting` + - `fix(api): handle missing auth cookie` + - `docs: update installation instructions` +- Reference related issues/discussions in the PR body and provide brief context. +- PRs should describe scope, list local test commands run, and mention any impacts on MCP server or UI if applicable. +- Sign commits and affirm the DCO (see `CONTRIBUTING.md`). + +## CI Mirrors Local Commands + +Our GitHub Actions run the same ruff checks and pytest suites shown above (`.github/workflows/basic_tests.yml` and related workflows). Use the commands in this document locally to minimize CI surprises. + + From 829f775577b3cf009c269ace9658fdbd511417ad Mon Sep 17 00:00:00 2001 From: chinu0609 Date: Mon, 27 Oct 2025 14:04:13 +0530 Subject: [PATCH 17/68] fix: change in the endpoint of embed --- .env.template | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.env.template b/.env.template index ec95e2876..7dcd4f346 100644 --- a/.env.template +++ b/.env.template @@ -242,15 +242,12 @@ LITELLM_LOG="ERROR" ########## Local LLM via Ollama ############################################### + #LLM_API_KEY ="ollama" #LLM_MODEL="llama3.1:8b" #LLM_PROVIDER="ollama" #LLM_ENDPOINT="http://localhost:11434/v1" #EMBEDDING_PROVIDER="ollama" -#EMBEDDING_MODEL="avr/sfr-embedding-mistral:latest" -#EMBEDDING_ENDPOINT="http://localhost:11434/api/embed" -#EMBEDDING_DIMENSIONS=4096 -#HUGGINGFACE_TOKENIZER="Salesforce/SFR-Embedding-Mistral" #EMBEDDING_MODEL="nomic-embed-text:latest" #EMBEDDING_ENDPOINT="http://localhost:11434/api/embed" #EMBEDDING_DIMENSIONS=768 From 897fbd2f09abfc1c3c5cc30fc2fcf17ed549ae80 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 27 Oct 2025 15:42:09 +0100 Subject: [PATCH 18/68] load test now uses s3 bucket --- cognee/tests/test_load.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index 09e2db084..f8d007d28 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -17,7 +17,9 @@ async def process_and_search(num_of_searches): await asyncio.gather( *[ - cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + cognee.search( + query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION + ) for _ in range(num_of_searches) ] ) @@ -28,9 +30,6 @@ async def process_and_search(num_of_searches): async def main(): - file_path = os.path.join( - pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" - ) data_directory_path = str( pathlib.Path( os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") @@ -52,8 +51,8 @@ async def main(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - for i in range(num_of_pdfs): - await cognee.add(file_path, dataset_name=f"dataset_{i}") + s3_input = "s3://cognee-load-test-s3-bucket" + await cognee.add(s3_input) recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] From 2b083dd0f110e44341d30a6228abb18591cfabac Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 09:27:33 +0100 Subject: [PATCH 19/68] small changes to load test --- cognee/tests/test_load.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index f8d007d28..a09ce053d 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -48,15 +48,15 @@ async def main(): upper_boundary_minutes = 10 average_minutes = 8 - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) + recorded_times = [] + for _ in range(num_of_reps): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) - s3_input = "s3://cognee-load-test-s3-bucket" - await cognee.add(s3_input) + s3_input = "s3://cognee-test-load-s3-bucket" + await cognee.add(s3_input) - recorded_times = await asyncio.gather( - *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] - ) + recorded_times.append(await process_and_search(num_of_pdfs)) average_recorded_time = sum(recorded_times) / len(recorded_times) From 15a03153753e907c02629d85b6e5c04c7ba13e0a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 28 Oct 2025 14:35:08 +0100 Subject: [PATCH 20/68] refactor: Remove LanceDB fallback --- .../databases/vector/create_vector_engine.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 639bbb9f6..d1cf855d7 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -47,7 +47,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - if vector_db_provider == "pgvector": + if vector_db_provider.lower() == "pgvector": from cognee.infrastructure.databases.relational import get_relational_config # Get configuration for postgres database @@ -78,7 +78,7 @@ def create_vector_engine( embedding_engine, ) - elif vector_db_provider == "chromadb": + elif vector_db_provider.lower() == "chromadb": try: import chromadb except ImportError: @@ -94,7 +94,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - elif vector_db_provider == "neptune_analytics": + elif vector_db_provider.lower() == "neptune_analytics": try: from langchain_aws import NeptuneAnalyticsGraph except ImportError: @@ -122,7 +122,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - else: + elif vector_db_provider.lower() == "lancedb": from .lancedb.LanceDBAdapter import LanceDBAdapter return LanceDBAdapter( @@ -130,3 +130,9 @@ def create_vector_engine( api_key=vector_db_key, embedding_engine=embedding_engine, ) + + else: + raise EnvironmentError( + f"Unsupported graph database provider: {vector_db_provider}. " + f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}" + ) From 221a0dba0158a2df88e713935d29be39c6e5b29d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Tue, 28 Oct 2025 15:27:29 +0100 Subject: [PATCH 21/68] feature: adds the concept of now to the qa for temporal queries (#1685) ## Description Adds the concept of now to the qa for temporal queries ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../llm/prompts/extract_query_time.txt | 28 +++++++++---------- .../modules/retrieval/temporal_retriever.py | 10 ++++--- examples/python/temporal_example.py | 1 + 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cognee/infrastructure/llm/prompts/extract_query_time.txt b/cognee/infrastructure/llm/prompts/extract_query_time.txt index 763d0e1c4..ce78c3471 100644 --- a/cognee/infrastructure/llm/prompts/extract_query_time.txt +++ b/cognee/infrastructure/llm/prompts/extract_query_time.txt @@ -1,15 +1,13 @@ -For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query. -## Timestamp requirements -- If the query contains interval extrack both starts_at and ends_at properties -- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same -- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none - -For example: "before 2009" -- starts_at: None, ends_at: 2009 or "after 2009" -- starts_at: 2009, ends_at: None -- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at -- If starts_at or ends_at cannot be extracted both of them has to be None -## Output Format -Your reply should be a JSON: list of dictionaries with the following structure: -```python -class QueryInterval(BaseModel): - starts_at: Optional[Timestamp] = None - ends_at: Optional[Timestamp] = None -``` \ No newline at end of file +You are tasked with identifying relevant time periods where the answer to a given query should be searched. +Current date is: `{{ time_now }}`. Determine relevant period(s) and return structured intervals. + +Extraction rules: + +1. Query without specific timestamp: use the time period with starts_at set to None and ends_at set to now. +2. Explicit time intervals: If the query specifies a range (e.g., from 2010 to 2020, between January and March 2023), extract both start and end dates. Always assign the earlier date to starts_at and the later date to ends_at. +3. Single timestamp: If the query refers to one specific moment (e.g., in 2015, on March 5, 2022), set starts_at and ends_at to that same timestamp. +4. Open-ended time references: For phrases such as "before X" or "after X", represent the unspecified side as None. For example: before 2009 → starts_at: None, ends_at: 2009; after 2009 → starts_at: 2009, ends_at: None. +5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp). +6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None. +7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at. +8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None. \ No newline at end of file diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 8ef5eed69..ec68d37bb 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -1,7 +1,7 @@ import os import asyncio from typing import Any, Optional, List, Type - +from datetime import datetime from operator import itemgetter from cognee.infrastructure.databases.vector import get_vector_engine @@ -79,7 +79,11 @@ class TemporalRetriever(GraphCompletionRetriever): else: base_directory = None - system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory) + time_now = datetime.now().strftime("%d-%m-%Y") + + system_prompt = render_prompt( + prompt_path, {"time_now": time_now}, base_directory=base_directory + ) interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval) @@ -108,8 +112,6 @@ class TemporalRetriever(GraphCompletionRetriever): graph_engine = await get_graph_engine() - triplets = [] - if time_from and time_to: ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to) elif time_from: diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py index c79e3c1db..f5e7d4a9a 100644 --- a/examples/python/temporal_example.py +++ b/examples/python/temporal_example.py @@ -77,6 +77,7 @@ async def main(): "What happened between 2000 and 2006?", "What happened between 1903 and 1995, I am interested in the Selected Works of Arnulf Øverland Ole Peter Arnulf Øverland?", "Who is Attaphol Buspakom Attaphol Buspakom?", + "Who was Arnulf Øverland?", ] for query_text in queries: From 8efd152f4cb806cc470007e290877bb7ff540cf0 Mon Sep 17 00:00:00 2001 From: vasilije Date: Tue, 28 Oct 2025 16:03:43 +0100 Subject: [PATCH 22/68] fix network v --- .../cognee_network_visualization.py | 423 ++++++++++++++++-- 1 file changed, 394 insertions(+), 29 deletions(-) diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index c735e70f1..dd172b1f3 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -16,17 +16,17 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = nodes_list = [] color_map = { - "Entity": "#f47710", - "EntityType": "#6510f4", - "DocumentChunk": "#801212", - "TextSummary": "#1077f4", - "TableRow": "#f47710", - "TableType": "#6510f4", - "ColumnValue": "#13613a", - "SchemaTable": "#f47710", - "DatabaseSchema": "#6510f4", - "SchemaRelationship": "#13613a", - "default": "#D3D3D3", + "Entity": "#5C10F4", + "EntityType": "#A550FF", + "DocumentChunk": "#0DFF00", + "TextSummary": "#5C10F4", + "TableRow": "#A550FF", + "TableType": "#5C10F4", + "ColumnValue": "#757470", + "SchemaTable": "#A550FF", + "DatabaseSchema": "#5C10F4", + "SchemaRelationship": "#323332", + "default": "#D8D8D8", } for node_id, node_info in nodes_data: @@ -98,16 +98,19 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = +
+
Hover a node or edge to inspect details
@@ -305,8 +666,12 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = """ - html_content = html_template.replace("{nodes}", json.dumps(nodes_list)) - html_content = html_content.replace("{links}", json.dumps(links_list)) + # Safely embed JSON inside