diff --git a/.env.template b/.env.template
index f3168e6e2..7fd3ba9e8 100644
--- a/.env.template
+++ b/.env.template
@@ -16,7 +16,7 @@
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
LLM_API_KEY="your_api_key"
-LLM_MODEL="openai/gpt-4o-mini"
+LLM_MODEL="openai/gpt-5-mini"
LLM_PROVIDER="openai"
LLM_ENDPOINT=""
LLM_API_VERSION=""
@@ -30,10 +30,13 @@ EMBEDDING_DIMENSIONS=3072
EMBEDDING_MAX_TOKENS=8191
# If embedding key is not provided same key set for LLM_API_KEY will be used
#EMBEDDING_API_KEY="your_api_key"
+# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
+# Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
+#EMBEDDING_BATCH_SIZE=2048
# If using BAML structured output these env variables will be used
BAML_LLM_PROVIDER=openai
-BAML_LLM_MODEL="gpt-4o-mini"
+BAML_LLM_MODEL="gpt-5-mini"
BAML_LLM_ENDPOINT=""
BAML_LLM_API_KEY="your_api_key"
BAML_LLM_API_VERSION=""
@@ -52,18 +55,18 @@ BAML_LLM_API_VERSION=""
################################################################################
# Configure storage backend (local filesystem or S3)
# STORAGE_BACKEND="local" # Default: uses local filesystem
-#
+#
# -- To switch to S3 storage, uncomment and fill these: ---------------------
# STORAGE_BACKEND="s3"
# STORAGE_BUCKET_NAME="your-bucket-name"
# AWS_REGION="us-east-1"
# AWS_ACCESS_KEY_ID="your-access-key"
# AWS_SECRET_ACCESS_KEY="your-secret-key"
-#
+#
# -- S3 Root Directories (optional) -----------------------------------------
# DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
# SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
-#
+#
# -- Cache Directory (auto-configured for S3) -------------------------------
# When STORAGE_BACKEND=s3, cache automatically uses S3: s3://BUCKET/cognee/cache
# To override the automatic S3 cache location, uncomment:
@@ -203,6 +206,16 @@ LITELLM_LOG="ERROR"
# DEFAULT_USER_EMAIL=""
# DEFAULT_USER_PASSWORD=""
+################################################################################
+# 📂 AWS Settings
+################################################################################
+
+#AWS_REGION=""
+#AWS_ENDPOINT_URL=""
+#AWS_ACCESS_KEY_ID=""
+#AWS_SECRET_ACCESS_KEY=""
+#AWS_SESSION_TOKEN=""
+
------------------------------- END OF POSSIBLE SETTINGS -------------------------------
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 7705a51eb..5a6d7b124 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -58,7 +58,7 @@ body:
- Python version: [e.g. 3.9.0]
- Cognee version: [e.g. 0.1.0]
- LLM Provider: [e.g. OpenAI, Ollama]
- - Database: [e.g. Neo4j, FalkorDB]
+ - Database: [e.g. Neo4j]
validations:
required: true
diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml
index e46a42edb..1326f2d81 100644
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@@ -41,4 +41,4 @@ runs:
EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
done
fi
- uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
+ uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 76ff5965c..0e6f74188 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,8 +1,8 @@
## Description
-
@@ -16,15 +16,6 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
- [ ] Performance improvement
- [ ] Other (please specify):
-## Changes Made
-
--
--
--
-
-## Testing
-
-
## Screenshots/Videos (if applicable)
@@ -40,11 +31,5 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages
-## Related Issues
-
-
-## Additional Notes
-
-
## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml
index a1a81b4d4..51ac9a82a 100644
--- a/.github/workflows/db_examples_tests.yml
+++ b/.github/workflows/db_examples_tests.yml
@@ -54,6 +54,10 @@ jobs:
with:
python-version: ${{ inputs.python-version }}
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Run Neo4j Example
env:
ENV: dev
@@ -66,9 +70,9 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: "neo4j"
- GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
- GRAPH_DATABASE_USERNAME: "neo4j"
- GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
run: |
uv run python examples/database_examples/neo4j_example.py
diff --git a/.github/workflows/distributed_test.yml b/.github/workflows/distributed_test.yml
new file mode 100644
index 000000000..57bbb7459
--- /dev/null
+++ b/.github/workflows/distributed_test.yml
@@ -0,0 +1,73 @@
+name: Distributed Cognee test with modal
+permissions:
+ contents: read
+on:
+ workflow_call:
+ inputs:
+ python-version:
+ required: false
+ type: string
+ default: '3.11.x'
+ secrets:
+ LLM_MODEL:
+ required: true
+ LLM_ENDPOINT:
+ required: true
+ LLM_API_KEY:
+ required: true
+ LLM_API_VERSION:
+ required: true
+ EMBEDDING_MODEL:
+ required: true
+ EMBEDDING_ENDPOINT:
+ required: true
+ EMBEDDING_API_KEY:
+ required: true
+ EMBEDDING_API_VERSION:
+ required: true
+ OPENAI_API_KEY:
+ required: true
+
+jobs:
+ run-server-start-test:
+ name: Distributed Cognee test (Modal)
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+ extra-dependencies: "distributed postgres"
+
+ - name: Run Distributed Cognee (Modal)
+ env:
+ ENV: 'dev'
+ LLM_MODEL: ${{ secrets.LLM_MODEL }}
+ LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+ LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+ LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+ EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+ EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+ EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+ EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+ MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+ MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+ MODAL_SECRET_NAME: ${{ secrets.MODAL_SECRET_NAME }}
+ GRAPH_DATABASE_PROVIDER: "neo4j"
+ GRAPH_DATABASE_URL: ${{ secrets.AZURE_NEO4j_URL }}
+ GRAPH_DATABASE_USERNAME: ${{ secrets.AZURE_NEO4J_USERNAME }}
+ GRAPH_DATABASE_PASSWORD: ${{ secrets.AZURE_NEO4J_PW }}
+ DB_PROVIDER: "postgres"
+ DB_NAME: ${{ secrets.AZURE_POSTGRES_DB_NAME }}
+ DB_HOST: ${{ secrets.AZURE_POSTGRES_HOST }}
+ DB_PORT: ${{ secrets.AZURE_POSTGRES_PORT }}
+ DB_USERNAME: ${{ secrets.AZURE_POSTGRES_USERNAME }}
+ DB_PASSWORD: ${{ secrets.AZURE_POSTGRES_PW }}
+ VECTOR_DB_PROVIDER: "pgvector"
+ COGNEE_DISTRIBUTED: "true"
+ run: uv run modal run ./distributed/entrypoint.py
diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml
index f4167a57a..4eaaa0386 100644
--- a/.github/workflows/examples_tests.yml
+++ b/.github/workflows/examples_tests.yml
@@ -1,5 +1,8 @@
name: Reusable Examples Tests
+permissions:
+ contents: read
+
on:
workflow_call:
@@ -131,3 +134,53 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./examples/python/memify_coding_agent_example.py
+
+ test-permissions-example:
+ name: Run Permissions Example
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+
+ - name: Run Memify Tests
+ env:
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ LLM_MODEL: ${{ secrets.LLM_MODEL }}
+ LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+ LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+ LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+ EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+ EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+ EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+ EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+ run: uv run python ./examples/python/permissions_example.py
+ test_docling_add:
+ name: Run Add with Docling Test
+ runs-on: macos-15
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+ extra-dependencies: 'docling'
+
+ - name: Run Docling Test
+ env:
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ LLM_MODEL: ${{ secrets.LLM_MODEL }}
+ LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+ LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+ LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+ EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+ EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+ EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+ EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+ run: uv run python ./cognee/tests/test_add_docling_document.py
diff --git a/.github/workflows/graph_db_tests.yml b/.github/workflows/graph_db_tests.yml
index e7e5a11b8..b07f6232f 100644
--- a/.github/workflows/graph_db_tests.yml
+++ b/.github/workflows/graph_db_tests.yml
@@ -71,6 +71,10 @@ jobs:
with:
python-version: ${{ inputs.python-version }}
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Run default Neo4j
env:
ENV: 'dev'
@@ -83,9 +87,9 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: "neo4j"
- GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
- GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
- GRAPH_DATABASE_USERNAME: "neo4j"
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
run: uv run python ./cognee/tests/test_neo4j.py
- name: Run Weighted Edges Tests with Neo4j
diff --git a/.github/workflows/relational_db_migration_tests.yml b/.github/workflows/relational_db_migration_tests.yml
index 26fd7e150..6a37e308e 100644
--- a/.github/workflows/relational_db_migration_tests.yml
+++ b/.github/workflows/relational_db_migration_tests.yml
@@ -186,6 +186,10 @@ jobs:
python-version: '3.11.x'
extra-dependencies: "postgres"
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Install specific db dependency
run: echo "Dependencies already installed in setup"
@@ -206,9 +210,9 @@ jobs:
env:
ENV: 'dev'
GRAPH_DATABASE_PROVIDER: "neo4j"
- GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
- GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
- GRAPH_DATABASE_USERNAME: "neo4j"
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml
index 0b749526d..e3e46dd97 100644
--- a/.github/workflows/search_db_tests.yml
+++ b/.github/workflows/search_db_tests.yml
@@ -51,20 +51,6 @@ jobs:
name: Search test for Neo4j/LanceDB/Sqlite
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/lance/sqlite') }}
- services:
- neo4j:
- image: neo4j:5.11
- env:
- NEO4J_AUTH: neo4j/pleaseletmein
- NEO4J_PLUGINS: '["apoc","graph-data-science"]'
- ports:
- - 7474:7474
- - 7687:7687
- options: >-
- --health-cmd="cypher-shell -u neo4j -p pleaseletmein 'RETURN 1'"
- --health-interval=10s
- --health-timeout=5s
- --health-retries=5
steps:
- name: Check out
@@ -77,6 +63,10 @@ jobs:
with:
python-version: ${{ inputs.python-version }}
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Dependencies already installed
run: echo "Dependencies already installed in setup"
@@ -94,9 +84,9 @@ jobs:
GRAPH_DATABASE_PROVIDER: 'neo4j'
VECTOR_DB_PROVIDER: 'lancedb'
DB_PROVIDER: 'sqlite'
- GRAPH_DATABASE_URL: bolt://localhost:7687
- GRAPH_DATABASE_USERNAME: neo4j
- GRAPH_DATABASE_PASSWORD: pleaseletmein
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
run: uv run python ./cognee/tests/test_search_db.py
run-kuzu-pgvector-postgres-search-tests:
@@ -158,19 +148,6 @@ jobs:
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/pgvector/postgres') }}
services:
- neo4j:
- image: neo4j:5.11
- env:
- NEO4J_AUTH: neo4j/pleaseletmein
- NEO4J_PLUGINS: '["apoc","graph-data-science"]'
- ports:
- - 7474:7474
- - 7687:7687
- options: >-
- --health-cmd="cypher-shell -u neo4j -p pleaseletmein 'RETURN 1'"
- --health-interval=10s
- --health-timeout=5s
- --health-retries=5
postgres:
image: pgvector/pgvector:pg17
env:
@@ -196,6 +173,10 @@ jobs:
python-version: ${{ inputs.python-version }}
extra-dependencies: "postgres"
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Dependencies already installed
run: echo "Dependencies already installed in setup"
@@ -213,9 +194,9 @@ jobs:
GRAPH_DATABASE_PROVIDER: 'neo4j'
VECTOR_DB_PROVIDER: 'pgvector'
DB_PROVIDER: 'postgres'
- GRAPH_DATABASE_URL: bolt://localhost:7687
- GRAPH_DATABASE_USERNAME: neo4j
- GRAPH_DATABASE_PASSWORD: pleaseletmein
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
DB_NAME: cognee_db
DB_HOST: 127.0.0.1
DB_PORT: 5432
diff --git a/.github/workflows/temporal_graph_tests.yml b/.github/workflows/temporal_graph_tests.yml
index 4156dffff..66f7d60ee 100644
--- a/.github/workflows/temporal_graph_tests.yml
+++ b/.github/workflows/temporal_graph_tests.yml
@@ -51,20 +51,6 @@ jobs:
name: Temporal Graph test Neo4j (lancedb + sqlite)
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/lance/sqlite') }}
- services:
- neo4j:
- image: neo4j:5.11
- env:
- NEO4J_AUTH: neo4j/pleaseletmein
- NEO4J_PLUGINS: '["apoc","graph-data-science"]'
- ports:
- - 7474:7474
- - 7687:7687
- options: >-
- --health-cmd="cypher-shell -u neo4j -p pleaseletmein 'RETURN 1'"
- --health-interval=10s
- --health-timeout=5s
- --health-retries=5
steps:
- name: Check out
@@ -77,6 +63,10 @@ jobs:
with:
python-version: ${{ inputs.python-version }}
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Dependencies already installed
run: echo "Dependencies already installed in setup"
@@ -94,9 +84,9 @@ jobs:
GRAPH_DATABASE_PROVIDER: 'neo4j'
VECTOR_DB_PROVIDER: 'lancedb'
DB_PROVIDER: 'sqlite'
- GRAPH_DATABASE_URL: bolt://localhost:7687
- GRAPH_DATABASE_USERNAME: neo4j
- GRAPH_DATABASE_PASSWORD: pleaseletmein
+ GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
+ GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
+ GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
run: uv run python ./cognee/tests/test_temporal_graph.py
run_temporal_graph_kuzu_postgres_pgvector:
diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml
index f99549c8d..6eb5744f3 100644
--- a/.github/workflows/test_different_operating_systems.yml
+++ b/.github/workflows/test_different_operating_systems.yml
@@ -43,7 +43,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ubuntu-22.04, macos-13, macos-15, windows-latest]
+ os: [ubuntu-22.04, macos-15, windows-latest]
fail-fast: false
steps:
- name: Check out
@@ -79,7 +79,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
+ os: [ ubuntu-22.04, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
@@ -115,7 +115,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
+ os: [ ubuntu-22.04, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
@@ -151,7 +151,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
+ os: [ ubuntu-22.04, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
@@ -180,7 +180,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
+ os: [ ubuntu-22.04, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
@@ -210,7 +210,7 @@ jobs:
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
- os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
+ os: [ ubuntu-22.04, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml
index 5a0f947c9..6b0221309 100644
--- a/.github/workflows/test_llms.yml
+++ b/.github/workflows/test_llms.yml
@@ -27,7 +27,7 @@ jobs:
env:
LLM_PROVIDER: "gemini"
LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
- LLM_MODEL: "gemini/gemini-1.5-flash"
+ LLM_MODEL: "gemini/gemini-2.0-flash"
EMBEDDING_PROVIDER: "gemini"
EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
EMBEDDING_MODEL: "gemini/text-embedding-004"
@@ -83,4 +83,4 @@ jobs:
EMBEDDING_MODEL: "openai/text-embedding-3-large"
EMBEDDING_DIMENSIONS: "3072"
EMBEDDING_MAX_TOKENS: "8191"
- run: uv run python ./examples/python/simple_example.py
\ No newline at end of file
+ run: uv run python ./examples/python/simple_example.py
diff --git a/.github/workflows/test_s3_file_storage.yml b/.github/workflows/test_s3_file_storage.yml
index a477d8933..11c808a2d 100644
--- a/.github/workflows/test_s3_file_storage.yml
+++ b/.github/workflows/test_s3_file_storage.yml
@@ -6,8 +6,12 @@ on:
permissions:
contents: read
+env:
+ RUNTIME__LOG_LEVEL: ERROR
+ ENV: 'dev'
+
jobs:
- test-gemini:
+ test-s3-storage:
name: Run S3 File Storage Test
runs-on: ubuntu-22.04
steps:
diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml
index ff18f2962..2f1bdebf0 100644
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@@ -27,6 +27,12 @@ jobs:
uses: ./.github/workflows/e2e_tests.yml
secrets: inherit
+ distributed-tests:
+ name: Distributed Cognee Test
+ needs: [ basic-tests, e2e-tests, graph-db-tests ]
+ uses: ./.github/workflows/distributed_test.yml
+ secrets: inherit
+
cli-tests:
name: CLI Tests
uses: ./.github/workflows/cli_tests.yml
@@ -104,7 +110,7 @@ jobs:
db-examples-tests:
name: DB Examples Tests
- needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
+ needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests, distributed-tests]
uses: ./.github/workflows/db_examples_tests.yml
secrets: inherit
diff --git a/.github/workflows/vector_db_tests.yml b/.github/workflows/vector_db_tests.yml
index a82194db3..06b58c962 100644
--- a/.github/workflows/vector_db_tests.yml
+++ b/.github/workflows/vector_db_tests.yml
@@ -101,3 +101,30 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_pgvector.py
+
+ run-lancedb-tests:
+ name: LanceDB Tests
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: ${{ inputs.python-version }}
+
+ - name: Run LanceDB Tests
+ env:
+ ENV: 'dev'
+ LLM_MODEL: ${{ secrets.LLM_MODEL }}
+ LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+ LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+ LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+ EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+ EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+ EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+ EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+ run: uv run python ./cognee/tests/test_lancedb.py
\ No newline at end of file
diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml
index df5e048f2..874ef6ea4 100644
--- a/.github/workflows/weighted_edges_tests.yml
+++ b/.github/workflows/weighted_edges_tests.yml
@@ -86,12 +86,19 @@ jobs:
with:
python-version: '3.11'
+ - name: Setup Neo4j with GDS
+ uses: ./.github/actions/setup_neo4j
+ id: neo4j
+
- name: Dependencies already installed
run: echo "Dependencies already installed in setup"
- name: Run Weighted Edges Tests
env:
GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }}
+ GRAPH_DATABASE_URL: ${{ matrix.graph_db_provider == 'neo4j' && steps.neo4j.outputs.neo4j-url || '' }}
+ GRAPH_DATABASE_USERNAME: ${{ matrix.graph_db_provider == 'neo4j' && steps.neo4j.outputs.neo4j-username || '' }}
+ GRAPH_DATABASE_PASSWORD: ${{ matrix.graph_db_provider == 'neo4j' && steps.neo4j.outputs.neo4j-password || '' }}
run: |
uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
diff --git a/Dockerfile b/Dockerfile
index be29f359a..9b9a34d41 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,6 +22,7 @@ RUN apt-get update && apt-get install -y \
libpq-dev \
git \
curl \
+ cmake \
clang \
build-essential \
&& rm -rf /var/lib/apt/lists/*
@@ -31,7 +32,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
- uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
+ uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
# Copy Alembic configuration
COPY alembic.ini /app/alembic.ini
@@ -42,7 +43,7 @@ COPY alembic/ /app/alembic
COPY ./cognee /app/cognee
COPY ./distributed /app/distributed
RUN --mount=type=cache,target=/root/.cache/uv \
-uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
+uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
FROM python:3.12-slim-bookworm
diff --git a/README.md b/README.md
index 30f829c93..a1eebae73 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
- cognee - Memory for AI Agents in 5 lines of code
+ cognee - Memory for AI Agents in 6 lines of code
Demo @@ -43,12 +43,10 @@ -**🚀 We launched Cogwit beta (Fully-hosted AI Memory): Sign up [here](https://platform.cognee.ai/)! 🚀** + Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Extract, Cognify, Load) pipelines. -More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github.com/topoteretes/cognee/tree/main/evals) -
🌐 Available Languages : @@ -70,53 +68,50 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github -## Features - -- Interconnect and retrieve your past conversations, documents, images and audio transcriptions -- Replaces RAG systems and reduces developer effort, and cost. -- Load data to graph and vector databases using only Pydantic -- Manipulate your data while ingesting from 30+ data sources ## Get Started Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo +## About cognee +cognee works locally and stores your data on your device. +Our hosted solution is just our deployment of OSS cognee on Modal, with the goal of making development and productionization easier. -## Contributing -Your contributions are at the core of making this a true open source project. Any contributions you make are **greatly appreciated**. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for more information. +Self-hosted package: + +- Interconnects any kind of documents: past conversations, files, images, and audio transcriptions +- Replaces RAG systems with a memory layer based on graphs and vectors +- Reduces developer effort and cost, while increasing quality and precision +- Provides Pythonic data pipelines that manage data ingestion from 30+ data sources +- Is highly customizable with custom tasks, pipelines, and a set of built-in search endpoints + +Hosted platform: +- Includes a managed UI and a [hosted solution](https://www.cognee.ai) +## Self-Hosted (Open Source) -## 📦 Installation +### 📦 Installation You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager. -Cognee supports Python 3.10 to 3.13 +Cognee supports Python 3.10 to 3.12 -### With pip +#### With uv ```bash -pip install cognee +uv pip install cognee ``` -## Local Cognee installation +Detailed instructions can be found in our [docs](https://docs.cognee.ai/getting-started/installation#environment-configuration) -You can install the local Cognee repo using **uv**, **pip** and **poetry**. -For local pip installation please make sure your pip version is above version 21.3. +### 💻 Basic Usage -### with UV with all optional dependencies - -```bash -uv sync --all-extras -``` - -## 💻 Basic Usage - -### Setup +#### Setup ``` import os @@ -125,10 +120,14 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY" ``` You can also set the variables by creating .env file, using our template. -To use different LLM providers, for more info check out our documentation +To use different LLM providers, for more info check out our documentation -### Simple example +#### Simple example + + + +##### Python This script will run the default pipeline: @@ -139,13 +138,16 @@ import asyncio async def main(): # Add text to cognee - await cognee.add("Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.") + await cognee.add("Cognee turns documents into AI memory.") # Generate the knowledge graph await cognee.cognify() + # Add memory algorithms to the graph + await cognee.memify() + # Query the knowledge graph - results = await cognee.search("Tell me about NLP") + results = await cognee.search("What does cognee do?") # Display the results for result in results: @@ -158,33 +160,38 @@ if __name__ == '__main__': ``` Example output: ``` - Natural Language Processing (NLP) is a cross-disciplinary and interdisciplinary field that involves computer science and information retrieval. It focuses on the interaction between computers and human language, enabling machines to understand and process natural language. + Cognee turns documents into AI memory. ``` +##### Via CLI -## Our paper is out! Read here +Let's get the basics covered + +``` +cognee-cli add "Cognee turns documents into AI memory." + +cognee-cli cognify + +cognee-cli search "What does cognee do?" +cognee-cli delete --all + +``` +or run +``` +cognee-cli -ui +``` -
-
+Get up and running in minutes with automatic updates, analytics, and enterprise security.
-### Running the UI
+1. Sign up on [cogwit](https://www.cognee.ai)
+2. Add your API key to local UI and sync your data to Cogwit
-Try cognee UI by setting LLM_API_KEY and running ``` cognee-cli -ui ``` command on your terminal.
-
-## Understand our architecture
-
-
-CODE_OF_CONDUCT for more information.
-## 💫 Contributors
+## Citation
-
- | Data |
| Data |