Merge branch 'dev' into feature/cog-2734-cognee-feedbacks-interactions-poc-to-prod

This commit is contained in:
hajdul88 2025-08-18 13:17:13 +02:00 committed by GitHub
commit d53ebb2164
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
124 changed files with 1337 additions and 2625 deletions

View file

@ -6,7 +6,7 @@
# Default graph database : Kuzu # Default graph database : Kuzu
# #
# These default databases are all file-based, so no extra setup is needed # These default databases are all file-based, so no extra setup is needed
# for local use. # for local use. The data by default will be stored in your .venv
############################################################################### ###############################################################################
################################################################################ ################################################################################
@ -16,7 +16,7 @@
STRUCTURED_OUTPUT_FRAMEWORK="instructor" STRUCTURED_OUTPUT_FRAMEWORK="instructor"
LLM_API_KEY="your_api_key" LLM_API_KEY="your_api_key"
LLM_MODEL="openai/gpt-4o-mini" LLM_MODEL="openai/gpt-5-mini"
LLM_PROVIDER="openai" LLM_PROVIDER="openai"
LLM_ENDPOINT="" LLM_ENDPOINT=""
LLM_API_VERSION="" LLM_API_VERSION=""
@ -33,11 +33,20 @@ EMBEDDING_MAX_TOKENS=8191
# If using BAML structured output these env variables will be used # If using BAML structured output these env variables will be used
BAML_LLM_PROVIDER=openai BAML_LLM_PROVIDER=openai
BAML_LLM_MODEL="gpt-4o-mini" BAML_LLM_MODEL="gpt-5-mini"
BAML_LLM_ENDPOINT="" BAML_LLM_ENDPOINT=""
BAML_LLM_API_KEY="your_api_key" BAML_LLM_API_KEY="your_api_key"
BAML_LLM_API_VERSION="" BAML_LLM_API_VERSION=""
################################################################################
# 📂 ROOT DIRECTORY FOR DATABASES
################################################################################
# Set up the Cognee system directory. Cognee will store system files and databases here.
# Useful for setting root directory inside docker and also to avoid storing the databases in .venv
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'
################################################################################ ################################################################################
# 🗄️ Relational database settings # 🗄️ Relational database settings
################################################################################ ################################################################################
@ -85,12 +94,7 @@ VECTOR_DB_PROVIDER="lancedb"
VECTOR_DB_URL= VECTOR_DB_URL=
VECTOR_DB_KEY= VECTOR_DB_KEY=
################################################################################
# 📂 ROOT DIRECTORY IF USING COGNEE LIB INSIDE A DOCKER
################################################################################
# Set up the Cognee system directory. Cognee will store system files and databases here.
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'
################################################################################ ################################################################################
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS # 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
@ -157,8 +161,8 @@ LITELLM_LOG="ERROR"
# Uncomment + fill values to switch. # Uncomment + fill values to switch.
########## Azure OpenAI ####################################################### ########## Azure OpenAI #######################################################
#LLM_MODEL="azure/gpt-4o-mini" #LLM_MODEL="azure/gpt-5-mini"
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-4o-mini" #LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-5-mini"
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY" #LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
#LLM_API_VERSION="2024-12-01-preview" #LLM_API_VERSION="2024-12-01-preview"

View file

@ -1,11 +1,15 @@
name: cognee-setup name: cognee-setup
description: "Sets up Python, installs Poetry, loads venv from cache, and installs dependencies for Cognee." description: "Sets up Python, installs uv, and installs dependencies for Cognee."
inputs: inputs:
python-version: python-version:
description: "Which Python version to use" description: "Which Python version to use"
required: false required: false
default: "3.11.x" default: "3.11.x"
extra-dependencies:
description: "Additional extra dependencies to install (space-separated)"
required: false
default: ""
runs: runs:
using: "composite" using: "composite"
@ -16,18 +20,25 @@ runs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install Poetry - name: Install uv
shell: bash uses: astral-sh/setup-uv@v4
run: | with:
python -m pip install --upgrade pip enable-cache: true
pip install poetry
- name: Rebuild Poetry lock file - name: Rebuild uv lockfile
shell: bash shell: bash
run: | run: |
rm poetry.lock rm uv.lock
poetry lock uv lock
- name: Install dependencies - name: Install dependencies
shell: bash shell: bash
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev -E neo4j run: |
EXTRA_ARGS=""
if [ -n "${{ inputs.extra-dependencies }}" ]; then
IFS=' ' read -r -a deps <<< "${{ inputs.extra-dependencies }}"
for extra in "${deps[@]}"; do
EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
done
fi
uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS

67
.github/actions/setup_neo4j/action.yml vendored Normal file
View file

@ -0,0 +1,67 @@
name: 'Setup Neo4j with Graph Data Science'
description: 'Sets up a Neo4j instance with APOC and Graph Data Science plugins for testing'
inputs:
neo4j-version:
description: 'Neo4j version to use'
required: false
default: '5.21'
neo4j-password:
description: 'Password for Neo4j'
required: false
default: 'cognee_test_password'
outputs:
neo4j-url:
description: 'Neo4j connection URL'
value: 'bolt://localhost:7687'
neo4j-username:
description: 'Neo4j username'
value: 'neo4j'
neo4j-password:
description: 'Neo4j password'
value: ${{ inputs.neo4j-password }}
runs:
using: 'composite'
steps:
- name: Start Neo4j with GDS
shell: bash
run: |
docker run -d \
--name neo4j-test \
-p 7474:7474 -p 7687:7687 \
-e NEO4J_AUTH="neo4j/${{ inputs.neo4j-password }}" \
-e NEO4J_PLUGINS='["apoc", "graph-data-science"]' \
-e NEO4J_dbms_security_procedures_unrestricted="apoc.*,gds.*" \
-e NEO4J_apoc_export_file_enabled=true \
-e NEO4J_apoc_import_file_enabled=true \
neo4j:${{ inputs.neo4j-version }}
- name: Wait for Neo4j to be ready
shell: bash
run: |
echo "Waiting for Neo4j to start..."
timeout=60
counter=0
while [ $counter -lt $timeout ]; do
if docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" "RETURN 1" > /dev/null 2>&1; then
echo "Neo4j is ready!"
break
fi
echo "Waiting... ($counter/$timeout)"
sleep 2
counter=$((counter + 2))
done
if [ $counter -ge $timeout ]; then
echo "Neo4j failed to start within $timeout seconds"
docker logs neo4j-test
exit 1
fi
- name: Verify GDS is available
shell: bash
run: |
echo "Verifying Graph Data Science library is available..."
docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" \
"CALL gds.version() YIELD gdsVersion RETURN gdsVersion"
echo "GDS verification complete!"

View file

@ -98,7 +98,7 @@ jobs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Run Unit Tests - name: Run Unit Tests
run: poetry run pytest cognee/tests/unit/ run: uv run pytest cognee/tests/unit/
integration-tests: integration-tests:
name: Run Integration Tests name: Run Integration Tests
@ -115,7 +115,7 @@ jobs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Run Integration Tests - name: Run Integration Tests
run: poetry run pytest cognee/tests/integration/ run: uv run pytest cognee/tests/integration/
simple-examples: simple-examples:
name: Run Simple Examples name: Run Simple Examples
@ -144,7 +144,7 @@ jobs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Run Simple Examples - name: Run Simple Examples
run: poetry run python ./examples/python/simple_example.py run: uv run python ./examples/python/simple_example.py
simple-examples-baml: simple-examples-baml:
name: Run Simple Examples BAML name: Run Simple Examples BAML
@ -180,7 +180,7 @@ jobs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Run Simple Examples - name: Run Simple Examples
run: poetry run python ./examples/python/simple_example.py run: uv run python ./examples/python/simple_example.py
graph-tests: graph-tests:
name: Run Basic Graph Tests name: Run Basic Graph Tests
@ -209,4 +209,4 @@ jobs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Run Graph Tests - name: Run Graph Tests
run: poetry run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph run: uv run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph

View file

@ -54,10 +54,6 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install Neo4j extra
run: |
poetry install -E neo4j
- name: Run Neo4j Example - name: Run Neo4j Example
env: env:
ENV: dev ENV: dev
@ -74,7 +70,7 @@ jobs:
GRAPH_DATABASE_USERNAME: "neo4j" GRAPH_DATABASE_USERNAME: "neo4j"
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
run: | run: |
poetry run python examples/database_examples/neo4j_example.py uv run python examples/database_examples/neo4j_example.py
run-db-example-kuzu: run-db-example-kuzu:
name: "Kuzu DB Example Test" name: "Kuzu DB Example Test"
@ -90,9 +86,8 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install Kuzu extra - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run Kuzu Example - name: Run Kuzu Example
env: env:
@ -107,7 +102,7 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: "kuzu" GRAPH_DATABASE_PROVIDER: "kuzu"
run: | run: |
poetry run python examples/database_examples/kuzu_example.py uv run python examples/database_examples/kuzu_example.py
run-db-example-pgvector: run-db-example-pgvector:
name: "PostgreSQL PGVector DB Example Test" name: "PostgreSQL PGVector DB Example Test"
@ -138,10 +133,7 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
extra-dependencies: "postgres"
- name: Install PGVector extra
run: |
poetry install -E postgres
- name: Run PGVector Example - name: Run PGVector Example
env: env:
@ -155,4 +147,4 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: | run: |
poetry run python examples/database_examples/pgvector_example.py uv run python examples/database_examples/pgvector_example.py

View file

@ -57,7 +57,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_cognee_server_start.py run: uv run python ./cognee/tests/test_cognee_server_start.py
run-telemetry-test: run-telemetry-test:
name: Run Telemetry Test name: Run Telemetry Test
@ -81,7 +81,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_telemetry.py run: uv run python ./cognee/tests/test_telemetry.py
run-telemetry-pipeline-test: run-telemetry-pipeline-test:
name: Run Telemetry Pipeline Test name: Run Telemetry Pipeline Test
@ -110,7 +110,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_library.py run: uv run python ./cognee/tests/test_library.py
run-deduplication-test: run-deduplication-test:
name: Deduplication Test name: Deduplication Test
@ -140,10 +140,7 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "postgres"
- name: Install specific db dependency
run: |
poetry install -E postgres
- name: Run Deduplication Example - name: Run Deduplication Example
env: env:
@ -154,7 +151,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_deduplication.py run: uv run python ./cognee/tests/test_deduplication.py
run-deletion-test: run-deletion-test:
name: Deletion Test name: Deletion Test
@ -179,7 +176,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_deletion.py run: uv run python ./cognee/tests/test_deletion.py
run-s3-bucket-test: run-s3-bucket-test:
name: S3 Bucket Test name: S3 Bucket Test
@ -192,10 +189,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "aws"
- name: Install specific S3 dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install -E aws
- name: Run S3 Bucket Test - name: Run S3 Bucket Test
env: env:
@ -210,7 +207,7 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_s3.py run: uv run python ./cognee/tests/test_s3.py
test-parallel-databases: test-parallel-databases:
name: Test using different async databases in parallel in Cognee name: Test using different async databases in parallel in Cognee
@ -224,9 +221,8 @@ jobs:
with: with:
python-version: '3.11.x' python-version: '3.11.x'
- name: Install specific graph db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run parallel databases test - name: Run parallel databases test
env: env:
@ -239,7 +235,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_parallel_databases.py run: uv run python ./cognee/tests/test_parallel_databases.py
test-permissions: test-permissions:
name: Test permissions with different situations in Cognee name: Test permissions with different situations in Cognee
@ -253,9 +249,8 @@ jobs:
with: with:
python-version: '3.11.x' python-version: '3.11.x'
- name: Install specific graph db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run parallel databases test - name: Run parallel databases test
env: env:
@ -268,7 +263,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_permissions.py run: uv run python ./cognee/tests/test_permissions.py
test-graph-edges: test-graph-edges:
name: Test graph edge ingestion name: Test graph edge ingestion
@ -282,9 +277,8 @@ jobs:
with: with:
python-version: '3.11.x' python-version: '3.11.x'
- name: Install specific graph db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run graph edges test - name: Run graph edges test
env: env:
@ -297,4 +291,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_edge_ingestion.py run: uv run python ./cognee/tests/test_edge_ingestion.py

View file

@ -20,7 +20,7 @@ jobs:
env: env:
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: poetry run python ./examples/python/multimedia_example.py run: uv run python ./examples/python/multimedia_example.py
test-eval-example: test-eval-example:
name: Run Eval Example name: Run Eval Example
@ -33,10 +33,7 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "deepeval"
- name: Install specific eval dependency
run: |
poetry install -E deepeval
- name: Run Evaluation Framework Example - name: Run Evaluation Framework Example
env: env:
@ -49,7 +46,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/eval_framework/run_eval.py run: uv run python ./cognee/eval_framework/run_eval.py
test-descriptive-metrics: test-descriptive-metrics:
name: Run Descriptive Metrics Example name: Run Descriptive Metrics Example
@ -63,6 +60,10 @@ jobs:
with: with:
python-version: '3.11.x' python-version: '3.11.x'
- name: Setup Neo4j with GDS
uses: ./.github/actions/setup_neo4j
id: neo4j
- name: Run Descriptive Graph Metrics Example - name: Run Descriptive Graph Metrics Example
env: env:
LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_MODEL: ${{ secrets.LLM_MODEL }}
@ -74,7 +75,11 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py GRAPH_DATABASE_PROVIDER: "neo4j"
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
test-dynamic-steps-metrics: test-dynamic-steps-metrics:
@ -100,4 +105,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./examples/python/dynamic_steps_example.py run: uv run python ./examples/python/dynamic_steps_example.py

View file

@ -28,9 +28,8 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install specific db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run Kuzu Tests - name: Run Kuzu Tests
env: env:
@ -43,7 +42,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_kuzu.py run: uv run python ./cognee/tests/test_kuzu.py
- name: Run Weighted Edges Tests with Kuzu - name: Run Weighted Edges Tests with Kuzu
env: env:
@ -57,7 +56,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v
run-neo4j-tests: run-neo4j-tests:
name: Neo4j Tests name: Neo4j Tests
@ -72,10 +71,6 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install specific db dependency
run: |
poetry install -E neo4j
- name: Run default Neo4j - name: Run default Neo4j
env: env:
ENV: 'dev' ENV: 'dev'
@ -91,7 +86,7 @@ jobs:
GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }} GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
GRAPH_DATABASE_USERNAME: "neo4j" GRAPH_DATABASE_USERNAME: "neo4j"
run: poetry run python ./cognee/tests/test_neo4j.py run: uv run python ./cognee/tests/test_neo4j.py
- name: Run Weighted Edges Tests with Neo4j - name: Run Weighted Edges Tests with Neo4j
env: env:
@ -108,4 +103,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v

View file

@ -55,7 +55,7 @@ jobs:
- name: Run unit tests - name: Run unit tests
shell: bash shell: bash
run: poetry run pytest cognee/tests/unit/ run: uv run pytest cognee/tests/unit/
env: env:
PYTHONUTF8: 1 PYTHONUTF8: 1
LLM_PROVIDER: openai LLM_PROVIDER: openai
@ -73,7 +73,7 @@ jobs:
- name: Run integration tests - name: Run integration tests
if: ${{ !contains(matrix.os, 'windows') }} if: ${{ !contains(matrix.os, 'windows') }}
shell: bash shell: bash
run: poetry run pytest cognee/tests/integration/ run: uv run pytest cognee/tests/integration/
env: env:
PYTHONUTF8: 1 PYTHONUTF8: 1
LLM_PROVIDER: openai LLM_PROVIDER: openai
@ -103,11 +103,11 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_library.py run: uv run python ./cognee/tests/test_library.py
- name: Build with Poetry - name: Build with uv
shell: bash shell: bash
run: poetry build run: uv build
- name: Install Package - name: Install Package
if: ${{ !contains(matrix.os, 'windows') }} if: ${{ !contains(matrix.os, 'windows') }}

View file

@ -58,10 +58,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "postgres"
- name: Install specific db dependency - name: Install specific db dependency
run: | run: echo "Dependencies already installed in setup"
poetry install -E postgres
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
env: env:
@ -90,7 +90,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_relational_db_migration.py run: uv run python ./cognee/tests/test_relational_db_migration.py
run-relational-db-migration-test-kuzu: run-relational-db-migration-test-kuzu:
name: Kuzu Relational DB Migration Test name: Kuzu Relational DB Migration Test
@ -120,10 +120,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "postgres"
- name: Install specific db dependency - name: Install specific db dependency
run: | run: echo "Dependencies already installed in setup"
poetry install -E postgres
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
env: env:
@ -154,7 +154,7 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_relational_db_migration.py run: uv run python ./cognee/tests/test_relational_db_migration.py
run-relational-db-migration-test-neo4j: run-relational-db-migration-test-neo4j:
name: Neo4j Relational DB Migration Test name: Neo4j Relational DB Migration Test
@ -184,10 +184,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: '3.11.x' python-version: '3.11.x'
extra-dependencies: "postgres"
- name: Install specific db dependency - name: Install specific db dependency
run: | run: echo "Dependencies already installed in setup"
poetry install -E postgres -E neo4j
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
env: env:
@ -221,4 +221,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_relational_db_migration.py run: uv run python ./cognee/tests/test_relational_db_migration.py

View file

@ -46,10 +46,7 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
extra-dependencies: "notebook"
- name: Install specific db dependency
run: |
poetry install -E notebook
- name: Execute Jupyter Notebook - name: Execute Jupyter Notebook
env: env:
@ -64,7 +61,7 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: | run: |
poetry run jupyter nbconvert \ uv run jupyter nbconvert \
--to notebook \ --to notebook \
--execute ${{ inputs.notebook-location }} \ --execute ${{ inputs.notebook-location }} \
--output executed_notebook.ipynb \ --output executed_notebook.ipynb \

View file

@ -28,9 +28,8 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install specific db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install
- name: Run Kuzu search Tests - name: Run Kuzu search Tests
env: env:
@ -46,7 +45,7 @@ jobs:
GRAPH_DATABASE_PROVIDER: 'kuzu' GRAPH_DATABASE_PROVIDER: 'kuzu'
VECTOR_DB_PROVIDER: 'lancedb' VECTOR_DB_PROVIDER: 'lancedb'
DB_PROVIDER: 'sqlite' DB_PROVIDER: 'sqlite'
run: poetry run python ./cognee/tests/test_search_db.py run: uv run python ./cognee/tests/test_search_db.py
run-neo4j-lance-sqlite-search-tests: run-neo4j-lance-sqlite-search-tests:
name: Search test for Neo4j/LanceDB/Sqlite name: Search test for Neo4j/LanceDB/Sqlite
@ -78,9 +77,8 @@ jobs:
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
- name: Install specific db dependency - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install -E neo4j
- name: Run Neo4j search Tests - name: Run Neo4j search Tests
env: env:
@ -99,7 +97,7 @@ jobs:
GRAPH_DATABASE_URL: bolt://localhost:7687 GRAPH_DATABASE_URL: bolt://localhost:7687
GRAPH_DATABASE_USERNAME: neo4j GRAPH_DATABASE_USERNAME: neo4j
GRAPH_DATABASE_PASSWORD: pleaseletmein GRAPH_DATABASE_PASSWORD: pleaseletmein
run: poetry run python ./cognee/tests/test_search_db.py run: uv run python ./cognee/tests/test_search_db.py
run-kuzu-pgvector-postgres-search-tests: run-kuzu-pgvector-postgres-search-tests:
name: Search test for Kuzu/PGVector/Postgres name: Search test for Kuzu/PGVector/Postgres
@ -129,9 +127,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
extra-dependencies: "postgres"
- name: Install dependencies - name: Dependencies already installed
run: poetry install -E postgres run: echo "Dependencies already installed in setup"
- name: Run Kuzu/PGVector/Postgres Tests - name: Run Kuzu/PGVector/Postgres Tests
env: env:
@ -152,7 +151,7 @@ jobs:
DB_PORT: 5432 DB_PORT: 5432
DB_USERNAME: cognee DB_USERNAME: cognee
DB_PASSWORD: cognee DB_PASSWORD: cognee
run: poetry run python ./cognee/tests/test_search_db.py run: uv run python ./cognee/tests/test_search_db.py
run-neo4j-pgvector-postgres-search-tests: run-neo4j-pgvector-postgres-search-tests:
name: Search test for Neo4j/PGVector/Postgres name: Search test for Neo4j/PGVector/Postgres
@ -195,10 +194,10 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
extra-dependencies: "postgres"
- name: Install dependencies - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install -E neo4j -E postgres
- name: Run Neo4j + PGVector + Postgres search Tests - name: Run Neo4j + PGVector + Postgres search Tests
env: env:
@ -222,4 +221,4 @@ jobs:
DB_PORT: 5432 DB_PORT: 5432
DB_USERNAME: cognee DB_USERNAME: cognee
DB_PASSWORD: cognee DB_PASSWORD: cognee
run: poetry run python ./cognee/tests/test_search_db.py run: uv run python ./cognee/tests/test_search_db.py

View file

@ -26,4 +26,4 @@ jobs:
EMBEDDING_MODEL: "gemini/text-embedding-004" EMBEDDING_MODEL: "gemini/text-embedding-004"
EMBEDDING_DIMENSIONS: "768" EMBEDDING_DIMENSIONS: "768"
EMBEDDING_MAX_TOKENS: "8076" EMBEDDING_MAX_TOKENS: "8076"
run: poetry run python ./examples/python/simple_example.py run: uv run python ./examples/python/simple_example.py

View file

@ -26,7 +26,7 @@ jobs:
- name: Install torch dependency - name: Install torch dependency
run: | run: |
poetry add torch uv add torch
# - name: Install ollama # - name: Install ollama
# run: curl -fsSL https://ollama.com/install.sh | sh # run: curl -fsSL https://ollama.com/install.sh | sh
@ -101,4 +101,4 @@ jobs:
EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings" EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings"
EMBEDDING_DIMENSIONS: "4096" EMBEDDING_DIMENSIONS: "4096"
HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral" HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral"
run: poetry run python ./examples/python/simple_example.py run: uv run python ./examples/python/simple_example.py

View file

@ -36,4 +36,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_s3_file_storage.py run: uv run python ./cognee/tests/test_s3_file_storage.py

View file

@ -56,7 +56,7 @@ jobs:
# EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} # EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
# EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} # EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
# EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} # EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
# run: poetry run python ./cognee/tests/test_chromadb.py # run: uv run python ./cognee/tests/test_chromadb.py
run-postgres-tests: run-postgres-tests:
@ -87,10 +87,7 @@ jobs:
uses: ./.github/actions/cognee_setup uses: ./.github/actions/cognee_setup
with: with:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
extra-dependencies: "postgres"
- name: Install specific db dependency
run: |
poetry install -E postgres
- name: Run PGVector Tests - name: Run PGVector Tests
env: env:
@ -103,4 +100,4 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_pgvector.py run: uv run python ./cognee/tests/test_pgvector.py

View file

@ -31,7 +31,7 @@ jobs:
python-version: ['3.11', '3.12'] python-version: ['3.11', '3.12']
env: env:
LLM_PROVIDER: openai LLM_PROVIDER: openai
LLM_MODEL: gpt-4o-mini LLM_MODEL: gpt-5-mini
LLM_API_KEY: ${{ secrets.LLM_API_KEY }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
steps: steps:
@ -47,11 +47,11 @@ jobs:
- name: Run Weighted Edges Unit Tests - name: Run Weighted Edges Unit Tests
run: | run: |
poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
- name: Run Standard Graph Tests (Regression) - name: Run Standard Graph Tests (Regression)
run: | run: |
poetry run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short uv run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short
test-with-different-databases: test-with-different-databases:
name: Test Weighted Edges with Different Graph Databases name: Test Weighted Edges with Different Graph Databases
@ -61,14 +61,12 @@ jobs:
database: ['kuzu', 'neo4j'] database: ['kuzu', 'neo4j']
include: include:
- database: kuzu - database: kuzu
install_extra: ""
graph_db_provider: "kuzu" graph_db_provider: "kuzu"
- database: neo4j - database: neo4j
install_extra: "-E neo4j"
graph_db_provider: "neo4j" graph_db_provider: "neo4j"
env: env:
LLM_PROVIDER: openai LLM_PROVIDER: openai
LLM_MODEL: gpt-4o-mini LLM_MODEL: gpt-5-mini
LLM_ENDPOINT: https://api.openai.com/v1/ LLM_ENDPOINT: https://api.openai.com/v1/
LLM_API_KEY: ${{ secrets.LLM_API_KEY }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: "2024-02-01" LLM_API_VERSION: "2024-02-01"
@ -88,22 +86,21 @@ jobs:
with: with:
python-version: '3.11' python-version: '3.11'
- name: Install Database Dependencies - name: Dependencies already installed
run: | run: echo "Dependencies already installed in setup"
poetry install ${{ matrix.install_extra }}
- name: Run Weighted Edges Tests - name: Run Weighted Edges Tests
env: env:
GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }} GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }}
run: | run: |
poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
test-examples: test-examples:
name: Test Weighted Edges Examples name: Test Weighted Edges Examples
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
env: env:
LLM_PROVIDER: openai LLM_PROVIDER: openai
LLM_MODEL: gpt-4o-mini LLM_MODEL: gpt-5-mini
LLM_ENDPOINT: https://api.openai.com/v1/ LLM_ENDPOINT: https://api.openai.com/v1/
LLM_API_KEY: ${{ secrets.LLM_API_KEY }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: "2024-02-01" LLM_API_VERSION: "2024-02-01"
@ -125,7 +122,7 @@ jobs:
- name: Test Weighted Edges Example - name: Test Weighted Edges Example
run: | run: |
poetry run python examples/python/weighted_edges_example.py uv run python examples/python/weighted_edges_example.py
- name: Verify Visualization File Created - name: Verify Visualization File Created
run: | run: |

View file

@ -92,7 +92,8 @@ Your contributions are at the core of making this a true open source project. An
## 📦 Installation ## 📦 Installation
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager. You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.
Cognee supports Python 3.8 to 3.12
Cognee supports Python 3.10 to 3.13
### With pip ### With pip
@ -102,7 +103,7 @@ pip install cognee
## Local Cognee installation ## Local Cognee installation
You can install the local Cognee repo using **pip**, **poetry** and **uv**. You can install the local Cognee repo using **uv**, **pip** and **poetry**.
For local pip installation please make sure your pip version is above version 21.3. For local pip installation please make sure your pip version is above version 21.3.
### with UV with all optional dependencies ### with UV with all optional dependencies

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 MiB

After

Width:  |  Height:  |  Size: 63 KiB

View file

@ -34,8 +34,8 @@
// }; // };
// const defaultModel = { // const defaultModel = {
// label: 'gpt-4o-mini', // label: 'gpt-5-mini',
// value: 'gpt-4o-mini', // value: 'gpt-5-mini',
// }; // };
// export default function Settings({ onDone = () => {}, submitButtonText = 'Save' }) { // export default function Settings({ onDone = () => {}, submitButtonText = 'Save' }) {

View file

@ -8,7 +8,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee", # "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1", "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.2",
"fastmcp>=2.10.0,<3.0.0", "fastmcp>=2.10.0,<3.0.0",
"mcp>=1.12.0,<2.0.0", "mcp>=1.12.0,<2.0.0",
"uv>=0.6.3,<1.0.0", "uv>=0.6.3,<1.0.0",

View file

@ -221,14 +221,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
- The actual cognify process may take significant time depending on text length - The actual cognify process may take significant time depending on text length
- Use the cognify_status tool to check the progress of the operation - Use the cognify_status tool to check the progress of the operation
Raises
------
InvalidValueError
If LLM_API_KEY is not set
ValueError
If chunks exceed max token limits (reduce chunk_size)
DatabaseNotCreatedError
If databases are not properly initialized
""" """
async def cognify_task( async def cognify_task(
@ -512,14 +504,6 @@ async def search(search_query: str, search_type: str) -> list:
- Different search types produce different output formats - Different search types produce different output formats
- The function handles the conversion between Cognee's internal result format and MCP's output format - The function handles the conversion between Cognee's internal result format and MCP's output format
Raises
------
InvalidValueError
If LLM_API_KEY is not set (for LLM-based search types)
ValueError
If query_text is empty or search parameters are invalid
NoDataError
If no relevant data found for the search query
""" """
async def search_task(search_query: str, search_type: str) -> str: async def search_task(search_query: str, search_type: str) -> str:

View file

@ -1,6 +1,6 @@
# In case you choose to use OpenAI provider, just adjust the model and api_key. # In case you choose to use OpenAI provider, just adjust the model and api_key.
LLM_API_KEY="" LLM_API_KEY=""
LLM_MODEL="openai/gpt-4o-mini" LLM_MODEL="openai/gpt-5-mini"
LLM_PROVIDER="openai" LLM_PROVIDER="openai"
# Not needed if you use OpenAI # Not needed if you use OpenAI
LLM_ENDPOINT="" LLM_ENDPOINT=""

View file

@ -128,17 +128,12 @@ async def add(
Optional: Optional:
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama" - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
- LLM_MODEL: Model name (default: "gpt-4o-mini") - LLM_MODEL: Model name (default: "gpt-5-mini")
- DEFAULT_USER_EMAIL: Custom default user email - DEFAULT_USER_EMAIL: Custom default user email
- DEFAULT_USER_PASSWORD: Custom default user password - DEFAULT_USER_PASSWORD: Custom default user password
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector" - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx" - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
Raises:
FileNotFoundError: If specified file paths don't exist
PermissionError: If user lacks access to files or dataset
UnsupportedFileTypeError: If file format cannot be processed
InvalidValueError: If LLM_API_KEY is not set or invalid
""" """
tasks = [ tasks = [
Task(resolve_data_directories, include_subdirectories=True), Task(resolve_data_directories, include_subdirectories=True),

View file

@ -91,7 +91,7 @@ async def cognify(
- LangchainChunker: Recursive character splitting with overlap - LangchainChunker: Recursive character splitting with overlap
Determines how documents are segmented for processing. Determines how documents are segmented for processing.
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None. chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
Formula: min(embedding_max_tokens, llm_max_tokens // 2) Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
Default limits: ~512-8192 tokens depending on models. Default limits: ~512-8192 tokens depending on models.
Smaller chunks = more granular but potentially fragmented knowledge. Smaller chunks = more granular but potentially fragmented knowledge.
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types. ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
@ -177,14 +177,6 @@ async def cognify(
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
Raises:
DatasetNotFoundError: If specified datasets don't exist
PermissionError: If user lacks processing rights
InvalidValueError: If LLM_API_KEY is not set
OntologyParsingError: If ontology file is malformed
ValueError: If chunks exceed max token limits (reduce chunk_size)
DatabaseNotCreatedError: If databases are not properly initialized
""" """
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)

View file

@ -2,7 +2,6 @@
import os import os
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
from cognee.exceptions import InvalidValueError, InvalidAttributeError
from cognee.modules.cognify.config import get_cognify_config from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.databases.vector import get_vectordb_config
@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import (
get_llm_config, get_llm_config,
) )
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
class config: class config:
@ -92,9 +92,7 @@ class config:
if hasattr(llm_config, key): if hasattr(llm_config, key):
object.__setattr__(llm_config, key, value) object.__setattr__(llm_config, key, value)
else: else:
raise InvalidAttributeError( raise InvalidConfigAttributeError(attribute=key)
message=f"'{key}' is not a valid attribute of the config."
)
@staticmethod @staticmethod
def set_chunk_strategy(chunk_strategy: object): def set_chunk_strategy(chunk_strategy: object):
@ -131,9 +129,7 @@ class config:
if hasattr(relational_db_config, key): if hasattr(relational_db_config, key):
object.__setattr__(relational_db_config, key, value) object.__setattr__(relational_db_config, key, value)
else: else:
raise InvalidAttributeError( raise InvalidConfigAttributeError(attribute=key)
message=f"'{key}' is not a valid attribute of the config."
)
@staticmethod @staticmethod
def set_migration_db_config(config_dict: dict): def set_migration_db_config(config_dict: dict):
@ -145,9 +141,7 @@ class config:
if hasattr(migration_db_config, key): if hasattr(migration_db_config, key):
object.__setattr__(migration_db_config, key, value) object.__setattr__(migration_db_config, key, value)
else: else:
raise InvalidAttributeError( raise InvalidConfigAttributeError(attribute=key)
message=f"'{key}' is not a valid attribute of the config."
)
@staticmethod @staticmethod
def set_graph_db_config(config_dict: dict) -> None: def set_graph_db_config(config_dict: dict) -> None:
@ -171,9 +165,7 @@ class config:
if hasattr(vector_db_config, key): if hasattr(vector_db_config, key):
object.__setattr__(vector_db_config, key, value) object.__setattr__(vector_db_config, key, value)
else: else:
raise InvalidAttributeError( InvalidConfigAttributeError(attribute=key)
message=f"'{key}' is not a valid attribute of the config."
)
@staticmethod @staticmethod
def set_vector_db_key(db_key: str): def set_vector_db_key(db_key: str):

View file

@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.methods import create_dataset, get_datasets_by_name from cognee.modules.data.methods import create_dataset, get_datasets_by_name
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.users.permissions.methods import ( from cognee.modules.users.permissions.methods import (

View file

@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user
from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.context_global_variables import set_database_global_context_variables from cognee.context_global_variables import set_database_global_context_variables
from cognee.api.v1.delete.exceptions import ( from cognee.api.v1.exceptions import (
DocumentNotFoundError, DocumentNotFoundError,
DatasetNotFoundError, DatasetNotFoundError,
DocumentSubgraphNotFoundError, DocumentSubgraphNotFoundError,

View file

@ -0,0 +1,13 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various data errors
"""
from .exceptions import (
InvalidConfigAttributeError,
DocumentNotFoundError,
DatasetNotFoundError,
DataNotFoundError,
DocumentSubgraphNotFoundError,
)

View file

@ -1,10 +1,19 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeConfigurationError, CogneeValidationError
from fastapi import status from fastapi import status
class DocumentNotFoundError(CogneeApiError): class InvalidConfigAttributeError(CogneeConfigurationError):
"""Raised when a document cannot be found in the database.""" def __init__(
self,
attribute: str,
name: str = "InvalidConfigAttributeError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = f"'{attribute}' is not a valid attribute of the configuration."
super().__init__(message, name, status_code)
class DocumentNotFoundError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Document not found in database.", message: str = "Document not found in database.",
@ -14,9 +23,7 @@ class DocumentNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DatasetNotFoundError(CogneeApiError): class DatasetNotFoundError(CogneeValidationError):
"""Raised when a dataset cannot be found."""
def __init__( def __init__(
self, self,
message: str = "Dataset not found.", message: str = "Dataset not found.",
@ -26,9 +33,7 @@ class DatasetNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DataNotFoundError(CogneeApiError): class DataNotFoundError(CogneeValidationError):
"""Raised when a dataset cannot be found."""
def __init__( def __init__(
self, self,
message: str = "Data not found.", message: str = "Data not found.",
@ -38,9 +43,7 @@ class DataNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DocumentSubgraphNotFoundError(CogneeApiError): class DocumentSubgraphNotFoundError(CogneeValidationError):
"""Raised when a document's subgraph cannot be found in the graph database."""
def __init__( def __init__(
self, self,
message: str = "Document subgraph not found in graph database.", message: str = "Document subgraph not found in graph database.",

View file

@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto" tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
user: Optional[str] = None user: Optional[str] = None
temperature: Optional[float] = 1.0 temperature: Optional[float] = 1.0
max_tokens: Optional[int] = None max_completion_tokens: Optional[int] = None
class ToolCallOutput(BaseModel): class ToolCallOutput(BaseModel):

View file

@ -161,13 +161,6 @@ async def search(
- VECTOR_DB_PROVIDER: Must match what was used during cognify - VECTOR_DB_PROVIDER: Must match what was used during cognify
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
Raises:
DatasetNotFoundError: If specified datasets don't exist or aren't accessible
PermissionDeniedError: If user lacks read access to requested datasets
NoDataError: If no relevant data found for the search query
InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
ValueError: If query_text is empty or search parameters are invalid
CollectionNotFoundError: If vector collection not found (data not processed)
""" """
# We use lists from now on for datasets # We use lists from now on for datasets
if isinstance(datasets, UUID) or isinstance(datasets, str): if isinstance(datasets, UUID) or isinstance(datasets, str):

View file

@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
"EM", "EM",
"f1", "f1",
] # Use only 'correctness' for DirectLLM ] # Use only 'correctness' for DirectLLM
deepeval_model: str = "gpt-4o-mini" deepeval_model: str = "gpt-5-mini"
# Metrics params # Metrics params
calculate_metrics: bool = True calculate_metrics: bool = True

View file

@ -2,13 +2,13 @@
Custom exceptions for the Cognee API. Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various application errors, This module defines a set of exceptions for handling various application errors,
such as service failures, resource conflicts, and invalid operations. such as System, Validation, Configuration or TransientErrors
""" """
from .exceptions import ( from .exceptions import (
CogneeApiError, CogneeApiError,
ServiceError, CogneeSystemError,
InvalidValueError, CogneeValidationError,
InvalidAttributeError, CogneeConfigurationError,
CriticalError, CogneeTransientError,
) )

View file

@ -35,37 +35,57 @@ class CogneeApiError(Exception):
return f"{self.name}: {self.message} (Status code: {self.status_code})" return f"{self.name}: {self.message} (Status code: {self.status_code})"
class ServiceError(CogneeApiError): class CogneeSystemError(CogneeApiError):
"""Failures in external services or APIs, like a database or a third-party service""" """System error"""
def __init__( def __init__(
self, self,
message: str = "Service is unavailable.", message: str = "A system error occurred.",
name: str = "ServiceError", name: str = "CogneeSystemError",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
log=True,
log_level="ERROR",
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code, log, log_level)
class InvalidValueError(CogneeApiError): class CogneeValidationError(CogneeApiError):
"""Validation error"""
def __init__( def __init__(
self, self,
message: str = "Invalid Value.", message: str = "A validation error occurred.",
name: str = "InvalidValueError", name: str = "CogneeValidationError",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
log=True,
log_level="ERROR",
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code, log, log_level)
class InvalidAttributeError(CogneeApiError): class CogneeConfigurationError(CogneeApiError):
"""SystemConfigError"""
def __init__( def __init__(
self, self,
message: str = "Invalid attribute.", message: str = "A system configuration error occurred.",
name: str = "InvalidAttributeError", name: str = "CogneeConfigurationError",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
log=True,
log_level="ERROR",
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code, log, log_level)
class CriticalError(CogneeApiError): class CogneeTransientError(CogneeApiError):
pass """TransientError"""
def __init__(
self,
message: str = "A transient error occurred.",
name: str = "CogneeTransientError",
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
log=True,
log_level="ERROR",
):
super().__init__(message, name, status_code, log, log_level)

View file

@ -0,0 +1,7 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various data errors
"""
from .exceptions import KeywordExtractionError

View file

@ -0,0 +1,22 @@
from cognee.exceptions import (
CogneeValidationError,
)
from fastapi import status
class KeywordExtractionError(CogneeValidationError):
"""
Raised when a provided value is syntactically valid but semantically unacceptable
for the given operation.
Example:
- Passing an empty string to a keyword extraction function.
"""
def __init__(
self,
message: str = "Extract_keywords cannot extract keywords from empty text.",
name: str = "KeywordExtractionError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)

View file

@ -1,6 +1,6 @@
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from cognee.exceptions import InvalidValueError from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
from cognee.shared.utils import extract_pos_tags from cognee.shared.utils import extract_pos_tags
@ -8,7 +8,7 @@ def extract_keywords(text: str) -> list[str]:
""" """
Extract keywords from the provided text string. Extract keywords from the provided text string.
This function raises an InvalidValueError if the input text is empty. It processes the This function raises an KeyWordExtractionError if the input text is empty. It processes the
text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
relevant keywords based on their frequency. The function returns a list of up to 15 relevant keywords based on their frequency. The function returns a list of up to 15
keywords, each having more than 3 characters. keywords, each having more than 3 characters.
@ -25,7 +25,7 @@ def extract_keywords(text: str) -> list[str]:
with more than 3 characters. with more than 3 characters.
""" """
if len(text) == 0: if len(text) == 0:
raise InvalidValueError(message="extract_keywords cannot extract keywords from empty text.") raise KeywordExtractionError()
tags = extract_pos_tags(text) tags = extract_pos_tags(text)
nouns = [word for (word, tag) in tags if tag == "NN"] nouns = [word for (word, tag) in tags if tag == "NN"]

View file

@ -1,20 +0,0 @@
from cognee.exceptions import CogneeApiError
from fastapi import status
class EmbeddingException(CogneeApiError):
"""
Custom exception for handling embedding-related errors.
This exception class is designed to indicate issues specifically related to embeddings
within the application. It extends the base exception class CogneeApiError and allows
for customization of the error message, name, and status code.
"""
def __init__(
self,
message: str = "Embedding Exception.",
name: str = "EmbeddingException",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
):
super().__init__(message, name, status_code)

View file

@ -8,4 +8,7 @@ from .exceptions import (
EntityNotFoundError, EntityNotFoundError,
EntityAlreadyExistsError, EntityAlreadyExistsError,
DatabaseNotCreatedError, DatabaseNotCreatedError,
EmbeddingException,
MissingQueryParameterError,
MutuallyExclusiveQueryParametersError,
) )

View file

@ -1,13 +1,13 @@
from fastapi import status from fastapi import status
from cognee.exceptions import CogneeApiError, CriticalError from cognee.exceptions import CogneeSystemError, CogneeValidationError, CogneeConfigurationError
class DatabaseNotCreatedError(CriticalError): class DatabaseNotCreatedError(CogneeSystemError):
""" """
Represents an error indicating that the database has not been created. This error should Represents an error indicating that the database has not been created. This error should
be raised when an attempt is made to access the database before it has been initialized. be raised when an attempt is made to access the database before it has been initialized.
Inherits from CriticalError. Overrides the constructor to include a default message and Inherits from CogneeSystemError. Overrides the constructor to include a default message and
status code. status code.
""" """
@ -20,10 +20,10 @@ class DatabaseNotCreatedError(CriticalError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class EntityNotFoundError(CogneeApiError): class EntityNotFoundError(CogneeValidationError):
""" """
Represents an error when a requested entity is not found in the database. This class Represents an error when a requested entity is not found in the database. This class
inherits from CogneeApiError. inherits from CogneeValidationError.
Public methods: Public methods:
@ -49,11 +49,11 @@ class EntityNotFoundError(CogneeApiError):
# super().__init__(message, name, status_code) :TODO: This is not an error anymore with the dynamic exception handling therefore we shouldn't log error # super().__init__(message, name, status_code) :TODO: This is not an error anymore with the dynamic exception handling therefore we shouldn't log error
class EntityAlreadyExistsError(CogneeApiError): class EntityAlreadyExistsError(CogneeValidationError):
""" """
Represents an error when an entity creation is attempted but the entity already exists. Represents an error when an entity creation is attempted but the entity already exists.
This class is derived from CogneeApiError and is used to signal a conflict in operations This class is derived from CogneeValidationError and is used to signal a conflict in operations
involving resource creation. involving resource creation.
""" """
@ -66,11 +66,11 @@ class EntityAlreadyExistsError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NodesetFilterNotSupportedError(CogneeApiError): class NodesetFilterNotSupportedError(CogneeConfigurationError):
""" """
Raise an exception when a nodeset filter is not supported by the current database. Raise an exception when a nodeset filter is not supported by the current database.
This exception inherits from `CogneeApiError` and is designed to provide information This exception inherits from `CogneeConfigurationError` and is designed to provide information
about the specific issue of unsupported nodeset filters in the context of graph about the specific issue of unsupported nodeset filters in the context of graph
databases. databases.
""" """
@ -84,3 +84,51 @@ class NodesetFilterNotSupportedError(CogneeApiError):
self.message = message self.message = message
self.name = name self.name = name
self.status_code = status_code self.status_code = status_code
class EmbeddingException(CogneeConfigurationError):
"""
Custom exception for handling embedding-related errors.
This exception class is designed to indicate issues specifically related to embeddings
within the application. It extends the base exception class CogneeConfigurationError allows
for customization of the error message, name, and status code.
"""
def __init__(
self,
message: str = "Embedding Exception.",
name: str = "EmbeddingException",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
):
super().__init__(message, name, status_code)
class MissingQueryParameterError(CogneeValidationError):
"""
Raised when neither 'query_text' nor 'query_vector' is provided,
and at least one is required to perform the operation.
"""
def __init__(
self,
name: str = "MissingQueryParameterError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = "One of query_text or query_vector must be provided!"
super().__init__(message, name, status_code)
class MutuallyExclusiveQueryParametersError(CogneeValidationError):
"""
Raised when both 'text' and 'embedding' are provided to the search function,
but only one type of input is allowed at a time.
"""
def __init__(
self,
name: str = "MutuallyExclusiveQueryParametersError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = "The search function accepts either text or embedding as input, but not both."
super().__init__(message, name, status_code)

View file

@ -21,10 +21,6 @@ async def get_graph_engine() -> GraphDBInterface:
if hasattr(graph_client, "initialize"): if hasattr(graph_client, "initialize"):
await graph_client.initialize() await graph_client.initialize()
# Handle loading of graph for NetworkX
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
await graph_client.load_graph_from_file()
return graph_client return graph_client
@ -181,8 +177,7 @@ def create_graph_engine(
graph_id=graph_identifier, graph_id=graph_identifier,
) )
from .networkx.adapter import NetworkXAdapter raise EnvironmentError(
f"Unsupported graph database provider: {graph_database_provider}. "
graph_client = NetworkXAdapter(filename=graph_file_path) f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
)
return graph_client

View file

@ -3,11 +3,16 @@
This module defines custom exceptions for Neptune Analytics operations. This module defines custom exceptions for Neptune Analytics operations.
""" """
from cognee.exceptions import CogneeApiError from cognee.exceptions import (
CogneeSystemError,
CogneeTransientError,
CogneeValidationError,
CogneeConfigurationError,
)
from fastapi import status from fastapi import status
class NeptuneAnalyticsError(CogneeApiError): class NeptuneAnalyticsError(CogneeSystemError):
"""Base exception for Neptune Analytics operations.""" """Base exception for Neptune Analytics operations."""
def __init__( def __init__(
@ -19,7 +24,7 @@ class NeptuneAnalyticsError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError): class NeptuneAnalyticsConnectionError(CogneeTransientError):
"""Exception raised when connection to Neptune Analytics fails.""" """Exception raised when connection to Neptune Analytics fails."""
def __init__( def __init__(
@ -31,7 +36,7 @@ class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsQueryError(NeptuneAnalyticsError): class NeptuneAnalyticsQueryError(CogneeValidationError):
"""Exception raised when a query execution fails.""" """Exception raised when a query execution fails."""
def __init__( def __init__(
@ -43,7 +48,7 @@ class NeptuneAnalyticsQueryError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError): class NeptuneAnalyticsAuthenticationError(CogneeConfigurationError):
"""Exception raised when authentication with Neptune Analytics fails.""" """Exception raised when authentication with Neptune Analytics fails."""
def __init__( def __init__(
@ -55,7 +60,7 @@ class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError): class NeptuneAnalyticsConfigurationError(CogneeConfigurationError):
"""Exception raised when Neptune Analytics configuration is invalid.""" """Exception raised when Neptune Analytics configuration is invalid."""
def __init__( def __init__(
@ -67,7 +72,7 @@ class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError): class NeptuneAnalyticsTimeoutError(CogneeTransientError):
"""Exception raised when a Neptune Analytics operation times out.""" """Exception raised when a Neptune Analytics operation times out."""
def __init__( def __init__(
@ -79,7 +84,7 @@ class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError): class NeptuneAnalyticsThrottlingError(CogneeTransientError):
"""Exception raised when requests are throttled by Neptune Analytics.""" """Exception raised when requests are throttled by Neptune Analytics."""
def __init__( def __init__(
@ -91,7 +96,7 @@ class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError): class NeptuneAnalyticsResourceNotFoundError(CogneeValidationError):
"""Exception raised when a Neptune Analytics resource is not found.""" """Exception raised when a Neptune Analytics resource is not found."""
def __init__( def __init__(
@ -103,7 +108,7 @@ class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NeptuneAnalyticsInvalidParameterError(NeptuneAnalyticsError): class NeptuneAnalyticsInvalidParameterError(CogneeValidationError):
"""Exception raised when invalid parameters are provided to Neptune Analytics.""" """Exception raised when invalid parameters are provided to Neptune Analytics."""
def __init__( def __init__(

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ from typing import List, Dict, Any, Optional, Tuple, Type, Union
from falkordb import FalkorDB from falkordb import FalkorDB
from cognee.exceptions import InvalidValueError from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
from cognee.infrastructure.databases.graph.graph_db_interface import ( from cognee.infrastructure.databases.graph.graph_db_interface import (
GraphDBInterface, GraphDBInterface,
record_graph_changes, record_graph_changes,
@ -721,7 +721,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
Returns the search results as a result set from the graph database. Returns the search results as a result set from the graph database.
""" """
if query_text is None and query_vector is None: if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!") raise MissingQueryParameterError()
if query_text and not query_vector: if query_text and not query_vector:
query_vector = (await self.embed_data([query_text]))[0] query_vector = (await self.embed_data([query_text]))[0]

View file

@ -5,7 +5,8 @@ import json
from typing import List, Optional, Any, Dict, Type, Tuple from typing import List, Optional, Any, Dict, Type, Tuple
from uuid import UUID from uuid import UUID
from cognee.exceptions import InvalidValueError from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
from cognee.infrastructure.databases.exceptions import MutuallyExclusiveQueryParametersError
from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
@ -274,11 +275,9 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
limit = self._TOPK_UPPER_BOUND limit = self._TOPK_UPPER_BOUND
if query_vector and query_text: if query_vector and query_text:
raise InvalidValueError( raise MutuallyExclusiveQueryParametersError()
message="The search function accepts either text or embedding as input, but not both."
)
elif query_text is None and query_vector is None: elif query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!") raise MissingQueryParameterError()
elif query_vector: elif query_vector:
embedding = query_vector embedding = query_vector
else: else:

View file

@ -4,13 +4,13 @@ from uuid import UUID
from typing import List, Optional from typing import List, Optional
from chromadb import AsyncHttpClient, Settings from chromadb import AsyncHttpClient, Settings
from cognee.exceptions import InvalidValueError
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.modules.storage.utils import get_own_properties from cognee.modules.storage.utils import get_own_properties
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.engine.utils import parse_id
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
from ..embeddings.EmbeddingEngine import EmbeddingEngine from ..embeddings.EmbeddingEngine import EmbeddingEngine
from ..vector_db_interface import VectorDBInterface from ..vector_db_interface import VectorDBInterface
@ -378,7 +378,7 @@ class ChromaDBAdapter(VectorDBInterface):
Returns a list of ScoredResult instances representing the search results. Returns a list of ScoredResult instances representing the search results.
""" """
if query_text is None and query_vector is None: if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!") raise MissingQueryParameterError()
if query_text and not query_vector: if query_text and not query_vector:
query_vector = (await self.embedding_engine.embed_text([query_text]))[0] query_vector = (await self.embedding_engine.embed_text([query_text]))[0]

View file

@ -41,11 +41,11 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
self, self,
model: Optional[str] = "openai/text-embedding-3-large", model: Optional[str] = "openai/text-embedding-3-large",
dimensions: Optional[int] = 3072, dimensions: Optional[int] = 3072,
max_tokens: int = 512, max_completion_tokens: int = 512,
): ):
self.model = model self.model = model
self.dimensions = dimensions self.dimensions = dimensions
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.tokenizer = self.get_tokenizer() self.tokenizer = self.get_tokenizer()
# self.retry_count = 0 # self.retry_count = 0
self.embedding_model = TextEmbedding(model_name=model) self.embedding_model = TextEmbedding(model_name=model)
@ -112,7 +112,9 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
""" """
logger.debug("Loading tokenizer for FastembedEmbeddingEngine...") logger.debug("Loading tokenizer for FastembedEmbeddingEngine...")
tokenizer = TikTokenTokenizer(model="gpt-4o", max_tokens=self.max_tokens) tokenizer = TikTokenTokenizer(
model="gpt-4o", max_completion_tokens=self.max_completion_tokens
)
logger.debug("Tokenizer loaded for for FastembedEmbeddingEngine") logger.debug("Tokenizer loaded for for FastembedEmbeddingEngine")
return tokenizer return tokenizer

View file

@ -6,7 +6,7 @@ import math
import litellm import litellm
import os import os
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException from cognee.infrastructure.databases.exceptions import EmbeddingException
from cognee.infrastructure.llm.tokenizer.Gemini import ( from cognee.infrastructure.llm.tokenizer.Gemini import (
GeminiTokenizer, GeminiTokenizer,
) )
@ -57,7 +57,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
api_key: str = None, api_key: str = None,
endpoint: str = None, endpoint: str = None,
api_version: str = None, api_version: str = None,
max_tokens: int = 512, max_completion_tokens: int = 512,
): ):
self.api_key = api_key self.api_key = api_key
self.endpoint = endpoint self.endpoint = endpoint
@ -65,7 +65,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
self.provider = provider self.provider = provider
self.model = model self.model = model
self.dimensions = dimensions self.dimensions = dimensions
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.tokenizer = self.get_tokenizer() self.tokenizer = self.get_tokenizer()
self.retry_count = 0 self.retry_count = 0
@ -179,20 +179,29 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
model = self.model.split("/")[-1] model = self.model.split("/")[-1]
if "openai" in self.provider.lower(): if "openai" in self.provider.lower():
tokenizer = TikTokenTokenizer(model=model, max_tokens=self.max_tokens) tokenizer = TikTokenTokenizer(
model=model, max_completion_tokens=self.max_completion_tokens
)
elif "gemini" in self.provider.lower(): elif "gemini" in self.provider.lower():
tokenizer = GeminiTokenizer(model=model, max_tokens=self.max_tokens) tokenizer = GeminiTokenizer(
model=model, max_completion_tokens=self.max_completion_tokens
)
elif "mistral" in self.provider.lower(): elif "mistral" in self.provider.lower():
tokenizer = MistralTokenizer(model=model, max_tokens=self.max_tokens) tokenizer = MistralTokenizer(
model=model, max_completion_tokens=self.max_completion_tokens
)
else: else:
try: try:
tokenizer = HuggingFaceTokenizer( tokenizer = HuggingFaceTokenizer(
model=self.model.replace("hosted_vllm/", ""), max_tokens=self.max_tokens model=self.model.replace("hosted_vllm/", ""),
max_completion_tokens=self.max_completion_tokens,
) )
except Exception as e: except Exception as e:
logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}") logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}")
logger.info("Switching to TikToken default tokenizer.") logger.info("Switching to TikToken default tokenizer.")
tokenizer = TikTokenTokenizer(model=None, max_tokens=self.max_tokens) tokenizer = TikTokenTokenizer(
model=None, max_completion_tokens=self.max_completion_tokens
)
logger.debug(f"Tokenizer loaded for model: {self.model}") logger.debug(f"Tokenizer loaded for model: {self.model}")
return tokenizer return tokenizer

View file

@ -30,7 +30,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
Instance variables: Instance variables:
- model - model
- dimensions - dimensions
- max_tokens - max_completion_tokens
- endpoint - endpoint
- mock - mock
- huggingface_tokenizer_name - huggingface_tokenizer_name
@ -39,7 +39,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
model: str model: str
dimensions: int dimensions: int
max_tokens: int max_completion_tokens: int
endpoint: str endpoint: str
mock: bool mock: bool
huggingface_tokenizer_name: str huggingface_tokenizer_name: str
@ -50,13 +50,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
self, self,
model: Optional[str] = "avr/sfr-embedding-mistral:latest", model: Optional[str] = "avr/sfr-embedding-mistral:latest",
dimensions: Optional[int] = 1024, dimensions: Optional[int] = 1024,
max_tokens: int = 512, max_completion_tokens: int = 512,
endpoint: Optional[str] = "http://localhost:11434/api/embeddings", endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral", huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
): ):
self.model = model self.model = model
self.dimensions = dimensions self.dimensions = dimensions
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.endpoint = endpoint self.endpoint = endpoint
self.huggingface_tokenizer_name = huggingface_tokenizer self.huggingface_tokenizer_name = huggingface_tokenizer
self.tokenizer = self.get_tokenizer() self.tokenizer = self.get_tokenizer()
@ -132,7 +132,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
""" """
logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...") logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...")
tokenizer = HuggingFaceTokenizer( tokenizer = HuggingFaceTokenizer(
model=self.huggingface_tokenizer_name, max_tokens=self.max_tokens model=self.huggingface_tokenizer_name, max_completion_tokens=self.max_completion_tokens
) )
logger.debug("Tokenizer loaded for OllamaEmbeddingEngine") logger.debug("Tokenizer loaded for OllamaEmbeddingEngine")
return tokenizer return tokenizer

View file

@ -18,7 +18,7 @@ class EmbeddingConfig(BaseSettings):
embedding_endpoint: Optional[str] = None embedding_endpoint: Optional[str] = None
embedding_api_key: Optional[str] = None embedding_api_key: Optional[str] = None
embedding_api_version: Optional[str] = None embedding_api_version: Optional[str] = None
embedding_max_tokens: Optional[int] = 8191 embedding_max_completion_tokens: Optional[int] = 8191
huggingface_tokenizer: Optional[str] = None huggingface_tokenizer: Optional[str] = None
model_config = SettingsConfigDict(env_file=".env", extra="allow") model_config = SettingsConfigDict(env_file=".env", extra="allow")
@ -38,7 +38,7 @@ class EmbeddingConfig(BaseSettings):
"embedding_endpoint": self.embedding_endpoint, "embedding_endpoint": self.embedding_endpoint,
"embedding_api_key": self.embedding_api_key, "embedding_api_key": self.embedding_api_key,
"embedding_api_version": self.embedding_api_version, "embedding_api_version": self.embedding_api_version,
"embedding_max_tokens": self.embedding_max_tokens, "embedding_max_completion_tokens": self.embedding_max_completion_tokens,
"huggingface_tokenizer": self.huggingface_tokenizer, "huggingface_tokenizer": self.huggingface_tokenizer,
} }

View file

@ -27,7 +27,7 @@ def get_embedding_engine() -> EmbeddingEngine:
config.embedding_provider, config.embedding_provider,
config.embedding_model, config.embedding_model,
config.embedding_dimensions, config.embedding_dimensions,
config.embedding_max_tokens, config.embedding_max_completion_tokens,
config.embedding_endpoint, config.embedding_endpoint,
config.embedding_api_key, config.embedding_api_key,
config.embedding_api_version, config.embedding_api_version,
@ -41,7 +41,7 @@ def create_embedding_engine(
embedding_provider, embedding_provider,
embedding_model, embedding_model,
embedding_dimensions, embedding_dimensions,
embedding_max_tokens, embedding_max_completion_tokens,
embedding_endpoint, embedding_endpoint,
embedding_api_key, embedding_api_key,
embedding_api_version, embedding_api_version,
@ -58,7 +58,7 @@ def create_embedding_engine(
'ollama', or another supported provider. 'ollama', or another supported provider.
- embedding_model: The model to be used for the embedding engine. - embedding_model: The model to be used for the embedding engine.
- embedding_dimensions: The number of dimensions for the embeddings. - embedding_dimensions: The number of dimensions for the embeddings.
- embedding_max_tokens: The maximum number of tokens for the embeddings. - embedding_max_completion_tokens: The maximum number of tokens for the embeddings.
- embedding_endpoint: The endpoint for the embedding service, relevant for certain - embedding_endpoint: The endpoint for the embedding service, relevant for certain
providers. providers.
- embedding_api_key: API key to authenticate with the embedding service, if - embedding_api_key: API key to authenticate with the embedding service, if
@ -81,7 +81,7 @@ def create_embedding_engine(
return FastembedEmbeddingEngine( return FastembedEmbeddingEngine(
model=embedding_model, model=embedding_model,
dimensions=embedding_dimensions, dimensions=embedding_dimensions,
max_tokens=embedding_max_tokens, max_completion_tokens=embedding_max_completion_tokens,
) )
if embedding_provider == "ollama": if embedding_provider == "ollama":
@ -90,7 +90,7 @@ def create_embedding_engine(
return OllamaEmbeddingEngine( return OllamaEmbeddingEngine(
model=embedding_model, model=embedding_model,
dimensions=embedding_dimensions, dimensions=embedding_dimensions,
max_tokens=embedding_max_tokens, max_completion_tokens=embedding_max_completion_tokens,
endpoint=embedding_endpoint, endpoint=embedding_endpoint,
huggingface_tokenizer=huggingface_tokenizer, huggingface_tokenizer=huggingface_tokenizer,
) )
@ -104,5 +104,5 @@ def create_embedding_engine(
api_version=embedding_api_version, api_version=embedding_api_version,
model=embedding_model, model=embedding_model,
dimensions=embedding_dimensions, dimensions=embedding_dimensions,
max_tokens=embedding_max_tokens, max_completion_tokens=embedding_max_completion_tokens,
) )

View file

@ -1,12 +1,12 @@
from fastapi import status from fastapi import status
from cognee.exceptions import CriticalError from cognee.exceptions import CogneeValidationError
class CollectionNotFoundError(CriticalError): class CollectionNotFoundError(CogneeValidationError):
""" """
Represents an error that occurs when a requested collection cannot be found. Represents an error that occurs when a requested collection cannot be found.
This class extends the CriticalError to handle specific cases where a requested This class extends the CogneeValidationError to handle specific cases where a requested
collection is unavailable. It can be initialized with a custom message and allows for collection is unavailable. It can be initialized with a custom message and allows for
logging options including log level and whether to log the error. logging options including log level and whether to log the error.
""" """

View file

@ -5,7 +5,7 @@ from pydantic import BaseModel
from lancedb.pydantic import LanceModel, Vector from lancedb.pydantic import LanceModel, Vector
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
from cognee.exceptions import InvalidValueError from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.engine.utils import parse_id
from cognee.infrastructure.files.storage import get_file_storage from cognee.infrastructure.files.storage import get_file_storage
@ -228,7 +228,7 @@ class LanceDBAdapter(VectorDBInterface):
normalized: bool = True, normalized: bool = True,
): ):
if query_text is None and query_vector is None: if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!") raise MissingQueryParameterError()
if query_text and not query_vector: if query_text and not query_vector:
query_vector = (await self.embedding_engine.embed_text([query_text]))[0] query_vector = (await self.embedding_engine.embed_text([query_text]))[0]

View file

@ -9,7 +9,7 @@ from sqlalchemy.exc import ProgrammingError
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
from cognee.exceptions import InvalidValueError
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.engine.utils import parse_id
@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
from distributed.utils import override_distributed from distributed.utils import override_distributed
from distributed.tasks.queued_add_data_points import queued_add_data_points from distributed.tasks.queued_add_data_points import queued_add_data_points
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
from ...relational.ModelBase import Base from ...relational.ModelBase import Base
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
@ -275,7 +276,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
return metadata.tables[collection_name] return metadata.tables[collection_name]
else: else:
raise CollectionNotFoundError( raise CollectionNotFoundError(
f"Collection '{collection_name}' not found!", log_level="DEBUG" f"Collection '{collection_name}' not found!",
) )
async def retrieve(self, collection_name: str, data_point_ids: List[str]): async def retrieve(self, collection_name: str, data_point_ids: List[str]):
@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
with_vector: bool = False, with_vector: bool = False,
) -> List[ScoredResult]: ) -> List[ScoredResult]:
if query_text is None and query_vector is None: if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!") raise MissingQueryParameterError()
if query_text and not query_vector: if query_text and not query_vector:
query_vector = (await self.embedding_engine.embed_text([query_text]))[0] query_vector = (await self.embedding_engine.embed_text([query_text]))[0]

View file

@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
- llm_api_version - llm_api_version
- llm_temperature - llm_temperature
- llm_streaming - llm_streaming
- llm_max_tokens - llm_max_completion_tokens
- transcription_model - transcription_model
- graph_prompt_path - graph_prompt_path
- llm_rate_limit_enabled - llm_rate_limit_enabled
@ -35,16 +35,16 @@ class LLMConfig(BaseSettings):
structured_output_framework: str = "instructor" structured_output_framework: str = "instructor"
llm_provider: str = "openai" llm_provider: str = "openai"
llm_model: str = "gpt-4o-mini" llm_model: str = "gpt-5-mini"
llm_endpoint: str = "" llm_endpoint: str = ""
llm_api_key: Optional[str] = None llm_api_key: Optional[str] = None
llm_api_version: Optional[str] = None llm_api_version: Optional[str] = None
llm_temperature: float = 0.0 llm_temperature: float = 0.0
llm_streaming: bool = False llm_streaming: bool = False
llm_max_tokens: int = 16384 llm_max_completion_tokens: int = 16384
baml_llm_provider: str = "openai" baml_llm_provider: str = "openai"
baml_llm_model: str = "gpt-4o-mini" baml_llm_model: str = "gpt-5-mini"
baml_llm_endpoint: str = "" baml_llm_endpoint: str = ""
baml_llm_api_key: Optional[str] = None baml_llm_api_key: Optional[str] = None
baml_llm_temperature: float = 0.0 baml_llm_temperature: float = 0.0
@ -171,7 +171,7 @@ class LLMConfig(BaseSettings):
"api_version": self.llm_api_version, "api_version": self.llm_api_version,
"temperature": self.llm_temperature, "temperature": self.llm_temperature,
"streaming": self.llm_streaming, "streaming": self.llm_streaming,
"max_tokens": self.llm_max_tokens, "max_completion_tokens": self.llm_max_completion_tokens,
"transcription_model": self.transcription_model, "transcription_model": self.transcription_model,
"graph_prompt_path": self.graph_prompt_path, "graph_prompt_path": self.graph_prompt_path,
"rate_limit_enabled": self.llm_rate_limit_enabled, "rate_limit_enabled": self.llm_rate_limit_enabled,

View file

@ -1,5 +1,33 @@
from cognee.exceptions.exceptions import CriticalError from cognee.exceptions.exceptions import CogneeValidationError
class ContentPolicyFilterError(CriticalError): class ContentPolicyFilterError(CogneeValidationError):
pass pass
class LLMAPIKeyNotSetError(CogneeValidationError):
"""
Raised when the LLM API key is not set in the configuration.
"""
def __init__(self, message: str = "LLM API key is not set."):
super().__init__(message=message, name="LLMAPIKeyNotSetError")
class UnsupportedLLMProviderError(CogneeValidationError):
"""
Raised when an unsupported LLM provider is specified in the configuration.
"""
def __init__(self, provider: str):
message = f"Unsupported LLM provider: {provider}"
super().__init__(message=message, name="UnsupportedLLMProviderError")
class MissingSystemPromptPathError(CogneeValidationError):
def __init__(
self,
name: str = "MissingSystemPromptPathError",
):
message = "No system prompt path provided."
super().__init__(message, name)

View file

@ -2,7 +2,7 @@ from typing import Type
from pydantic import BaseModel from pydantic import BaseModel
import instructor import instructor
from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
LLMInterface, LLMInterface,
) )
@ -23,7 +23,7 @@ class AnthropicAdapter(LLMInterface):
name = "Anthropic" name = "Anthropic"
model: str model: str
def __init__(self, max_tokens: int, model: str = None): def __init__(self, max_completion_tokens: int, model: str = None):
import anthropic import anthropic
self.aclient = instructor.patch( self.aclient = instructor.patch(
@ -31,7 +31,7 @@ class AnthropicAdapter(LLMInterface):
) )
self.model = model self.model = model
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
@sleep_and_retry_async() @sleep_and_retry_async()
@rate_limit_async @rate_limit_async
@ -57,7 +57,7 @@ class AnthropicAdapter(LLMInterface):
return await self.aclient( return await self.aclient(
model=self.model, model=self.model,
max_tokens=4096, max_completion_tokens=4096,
max_retries=5, max_retries=5,
messages=[ messages=[
{ {
@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface):
if not text_input: if not text_input:
text_input = "No user input provided." text_input = "No user input provided."
if not system_prompt: if not system_prompt:
raise InvalidValueError(message="No system prompt path provided.") raise MissingSystemPromptPathError()
system_prompt = LLMGateway.read_query_prompt(system_prompt) system_prompt = LLMGateway.read_query_prompt(system_prompt)

View file

@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.modules.observability.get_observe import get_observe from cognee.modules.observability.get_observe import get_observe
from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
LLMInterface, LLMInterface,
) )
@ -34,7 +34,7 @@ class GeminiAdapter(LLMInterface):
self, self,
api_key: str, api_key: str,
model: str, model: str,
max_tokens: int, max_completion_tokens: int,
endpoint: Optional[str] = None, endpoint: Optional[str] = None,
api_version: Optional[str] = None, api_version: Optional[str] = None,
streaming: bool = False, streaming: bool = False,
@ -44,7 +44,7 @@ class GeminiAdapter(LLMInterface):
self.endpoint = endpoint self.endpoint = endpoint
self.api_version = api_version self.api_version = api_version
self.streaming = streaming self.streaming = streaming
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
@observe(as_type="generation") @observe(as_type="generation")
@sleep_and_retry_async() @sleep_and_retry_async()
@ -90,7 +90,7 @@ class GeminiAdapter(LLMInterface):
model=f"{self.model}", model=f"{self.model}",
messages=messages, messages=messages,
api_key=self.api_key, api_key=self.api_key,
max_tokens=self.max_tokens, max_completion_tokens=self.max_completion_tokens,
temperature=0.1, temperature=0.1,
response_format=response_schema, response_format=response_schema,
timeout=100, timeout=100,
@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface):
""" """
Format and display the prompt for a user query. Format and display the prompt for a user query.
Raises an InvalidValueError if no system prompt is provided. Raises an MissingQueryParameterError if no system prompt is provided.
Parameters: Parameters:
----------- -----------
@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface):
if not text_input: if not text_input:
text_input = "No user input provided." text_input = "No user input provided."
if not system_prompt: if not system_prompt:
raise InvalidValueError(message="No system prompt path provided.") raise MissingSystemPromptPathError()
system_prompt = LLMGateway.read_query_prompt(system_prompt) system_prompt = LLMGateway.read_query_prompt(system_prompt)
formatted_prompt = ( formatted_prompt = (

View file

@ -41,7 +41,7 @@ class GenericAPIAdapter(LLMInterface):
api_key: str, api_key: str,
model: str, model: str,
name: str, name: str,
max_tokens: int, max_completion_tokens: int,
fallback_model: str = None, fallback_model: str = None,
fallback_api_key: str = None, fallback_api_key: str = None,
fallback_endpoint: str = None, fallback_endpoint: str = None,
@ -50,7 +50,7 @@ class GenericAPIAdapter(LLMInterface):
self.model = model self.model = model
self.api_key = api_key self.api_key = api_key
self.endpoint = endpoint self.endpoint = endpoint
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.fallback_model = fallback_model self.fallback_model = fallback_model
self.fallback_api_key = fallback_api_key self.fallback_api_key = fallback_api_key

View file

@ -2,11 +2,14 @@
from enum import Enum from enum import Enum
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
OllamaAPIAdapter, OllamaAPIAdapter,
) )
from cognee.infrastructure.llm.exceptions import (
LLMAPIKeyNotSetError,
UnsupportedLLMProviderError,
)
# Define an Enum for LLM Providers # Define an Enum for LLM Providers
@ -35,7 +38,7 @@ def get_llm_client():
This function retrieves the configuration for the LLM provider and model, and This function retrieves the configuration for the LLM provider and model, and
initializes the appropriate LLM client adapter accordingly. It raises an initializes the appropriate LLM client adapter accordingly. It raises an
InvalidValueError if the LLM API key is not set for certain providers or if the provider LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider
is unsupported. is unsupported.
Returns: Returns:
@ -51,15 +54,19 @@ def get_llm_client():
# Check if max_token value is defined in liteLLM for given model # Check if max_token value is defined in liteLLM for given model
# if not use value from cognee configuration # if not use value from cognee configuration
from cognee.infrastructure.llm.utils import ( from cognee.infrastructure.llm.utils import (
get_model_max_tokens, get_model_max_completion_tokens,
) # imported here to avoid circular imports ) # imported here to avoid circular imports
model_max_tokens = get_model_max_tokens(llm_config.llm_model) model_max_completion_tokens = get_model_max_completion_tokens(llm_config.llm_model)
max_tokens = model_max_tokens if model_max_tokens else llm_config.llm_max_tokens max_completion_tokens = (
model_max_completion_tokens
if model_max_completion_tokens
else llm_config.llm_max_completion_tokens
)
if provider == LLMProvider.OPENAI: if provider == LLMProvider.OPENAI:
if llm_config.llm_api_key is None: if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.") raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
OpenAIAdapter, OpenAIAdapter,
@ -71,7 +78,7 @@ def get_llm_client():
api_version=llm_config.llm_api_version, api_version=llm_config.llm_api_version,
model=llm_config.llm_model, model=llm_config.llm_model,
transcription_model=llm_config.transcription_model, transcription_model=llm_config.transcription_model,
max_tokens=max_tokens, max_completion_tokens=max_completion_tokens,
streaming=llm_config.llm_streaming, streaming=llm_config.llm_streaming,
fallback_api_key=llm_config.fallback_api_key, fallback_api_key=llm_config.fallback_api_key,
fallback_endpoint=llm_config.fallback_endpoint, fallback_endpoint=llm_config.fallback_endpoint,
@ -80,7 +87,7 @@ def get_llm_client():
elif provider == LLMProvider.OLLAMA: elif provider == LLMProvider.OLLAMA:
if llm_config.llm_api_key is None: if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.") raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
GenericAPIAdapter, GenericAPIAdapter,
@ -91,7 +98,7 @@ def get_llm_client():
llm_config.llm_api_key, llm_config.llm_api_key,
llm_config.llm_model, llm_config.llm_model,
"Ollama", "Ollama",
max_tokens=max_tokens, max_completion_tokens=max_completion_tokens,
) )
elif provider == LLMProvider.ANTHROPIC: elif provider == LLMProvider.ANTHROPIC:
@ -99,11 +106,13 @@ def get_llm_client():
AnthropicAdapter, AnthropicAdapter,
) )
return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model) return AnthropicAdapter(
max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
)
elif provider == LLMProvider.CUSTOM: elif provider == LLMProvider.CUSTOM:
if llm_config.llm_api_key is None: if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.") raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
GenericAPIAdapter, GenericAPIAdapter,
@ -114,7 +123,7 @@ def get_llm_client():
llm_config.llm_api_key, llm_config.llm_api_key,
llm_config.llm_model, llm_config.llm_model,
"Custom", "Custom",
max_tokens=max_tokens, max_completion_tokens=max_completion_tokens,
fallback_api_key=llm_config.fallback_api_key, fallback_api_key=llm_config.fallback_api_key,
fallback_endpoint=llm_config.fallback_endpoint, fallback_endpoint=llm_config.fallback_endpoint,
fallback_model=llm_config.fallback_model, fallback_model=llm_config.fallback_model,
@ -122,7 +131,7 @@ def get_llm_client():
elif provider == LLMProvider.GEMINI: elif provider == LLMProvider.GEMINI:
if llm_config.llm_api_key is None: if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.") raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
GeminiAdapter, GeminiAdapter,
@ -131,11 +140,11 @@ def get_llm_client():
return GeminiAdapter( return GeminiAdapter(
api_key=llm_config.llm_api_key, api_key=llm_config.llm_api_key,
model=llm_config.llm_model, model=llm_config.llm_model,
max_tokens=max_tokens, max_completion_tokens=max_completion_tokens,
endpoint=llm_config.llm_endpoint, endpoint=llm_config.llm_endpoint,
api_version=llm_config.llm_api_version, api_version=llm_config.llm_api_version,
streaming=llm_config.llm_streaming, streaming=llm_config.llm_streaming,
) )
else: else:
raise InvalidValueError(message=f"Unsupported LLM provider: {provider}") raise UnsupportedLLMProviderError(provider)

View file

@ -30,16 +30,18 @@ class OllamaAPIAdapter(LLMInterface):
- model - model
- api_key - api_key
- endpoint - endpoint
- max_tokens - max_completion_tokens
- aclient - aclient
""" """
def __init__(self, endpoint: str, api_key: str, model: str, name: str, max_tokens: int): def __init__(
self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
):
self.name = name self.name = name
self.model = model self.model = model
self.api_key = api_key self.api_key = api_key
self.endpoint = endpoint self.endpoint = endpoint
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.aclient = instructor.from_openai( self.aclient = instructor.from_openai(
OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
@ -159,7 +161,7 @@ class OllamaAPIAdapter(LLMInterface):
], ],
} }
], ],
max_tokens=300, max_completion_tokens=300,
) )
# Ensure response is valid before accessing .choices[0].message.content # Ensure response is valid before accessing .choices[0].message.content

View file

@ -7,12 +7,14 @@ from openai import ContentFilterFinishReasonError
from litellm.exceptions import ContentPolicyViolationError from litellm.exceptions import ContentPolicyViolationError
from instructor.exceptions import InstructorRetryException from instructor.exceptions import InstructorRetryException
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
LLMInterface, LLMInterface,
) )
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError from cognee.infrastructure.llm.exceptions import (
ContentPolicyFilterError,
MissingSystemPromptPathError,
)
from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.infrastructure.files.utils.open_data_file import open_data_file
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
rate_limit_async, rate_limit_async,
@ -62,7 +64,7 @@ class OpenAIAdapter(LLMInterface):
api_version: str, api_version: str,
model: str, model: str,
transcription_model: str, transcription_model: str,
max_tokens: int, max_completion_tokens: int,
streaming: bool = False, streaming: bool = False,
fallback_model: str = None, fallback_model: str = None,
fallback_api_key: str = None, fallback_api_key: str = None,
@ -75,7 +77,7 @@ class OpenAIAdapter(LLMInterface):
self.api_key = api_key self.api_key = api_key
self.endpoint = endpoint self.endpoint = endpoint
self.api_version = api_version self.api_version = api_version
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
self.streaming = streaming self.streaming = streaming
self.fallback_model = fallback_model self.fallback_model = fallback_model
@ -299,7 +301,7 @@ class OpenAIAdapter(LLMInterface):
api_key=self.api_key, api_key=self.api_key,
api_base=self.endpoint, api_base=self.endpoint,
api_version=self.api_version, api_version=self.api_version,
max_tokens=300, max_completion_tokens=300,
max_retries=self.MAX_RETRIES, max_retries=self.MAX_RETRIES,
) )
@ -308,7 +310,7 @@ class OpenAIAdapter(LLMInterface):
Format and display the prompt for a user query. Format and display the prompt for a user query.
This method formats the prompt using the provided user input and system prompt, This method formats the prompt using the provided user input and system prompt,
returning a string representation. Raises InvalidValueError if the system prompt is not returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not
provided. provided.
Parameters: Parameters:
@ -325,7 +327,7 @@ class OpenAIAdapter(LLMInterface):
if not text_input: if not text_input:
text_input = "No user input provided." text_input = "No user input provided."
if not system_prompt: if not system_prompt:
raise InvalidValueError(message="No system prompt path provided.") raise MissingSystemPromptPathError()
system_prompt = LLMGateway.read_query_prompt(system_prompt) system_prompt = LLMGateway.read_query_prompt(system_prompt)
formatted_prompt = ( formatted_prompt = (

View file

@ -17,10 +17,10 @@ class GeminiTokenizer(TokenizerInterface):
def __init__( def __init__(
self, self,
model: str, model: str,
max_tokens: int = 3072, max_completion_tokens: int = 3072,
): ):
self.model = model self.model = model
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
# Get LLM API key from config # Get LLM API key from config
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config

View file

@ -14,17 +14,17 @@ class HuggingFaceTokenizer(TokenizerInterface):
Instance variables include: Instance variables include:
- model: str - model: str
- max_tokens: int - max_completion_tokens: int
- tokenizer: AutoTokenizer - tokenizer: AutoTokenizer
""" """
def __init__( def __init__(
self, self,
model: str, model: str,
max_tokens: int = 512, max_completion_tokens: int = 512,
): ):
self.model = model self.model = model
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
# Import here to make it an optional dependency # Import here to make it an optional dependency
from transformers import AutoTokenizer from transformers import AutoTokenizer

View file

@ -16,17 +16,17 @@ class MistralTokenizer(TokenizerInterface):
Instance variables include: Instance variables include:
- model: str - model: str
- max_tokens: int - max_completion_tokens: int
- tokenizer: MistralTokenizer - tokenizer: MistralTokenizer
""" """
def __init__( def __init__(
self, self,
model: str, model: str,
max_tokens: int = 3072, max_completion_tokens: int = 3072,
): ):
self.model = model self.model = model
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
# Import here to make it an optional dependency # Import here to make it an optional dependency
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer from mistral_common.tokens.tokenizers.mistral import MistralTokenizer

View file

@ -13,10 +13,10 @@ class TikTokenTokenizer(TokenizerInterface):
def __init__( def __init__(
self, self,
model: Optional[str] = None, model: Optional[str] = None,
max_tokens: int = 8191, max_completion_tokens: int = 8191,
): ):
self.model = model self.model = model
self.max_tokens = max_tokens self.max_completion_tokens = max_completion_tokens
# Initialize TikToken for GPT based on model # Initialize TikToken for GPT based on model
if model: if model:
self.tokenizer = tiktoken.encoding_for_model(self.model) self.tokenizer = tiktoken.encoding_for_model(self.model)
@ -93,9 +93,9 @@ class TikTokenTokenizer(TokenizerInterface):
num_tokens = len(self.tokenizer.encode(text)) num_tokens = len(self.tokenizer.encode(text))
return num_tokens return num_tokens
def trim_text_to_max_tokens(self, text: str) -> str: def trim_text_to_max_completion_tokens(self, text: str) -> str:
""" """
Trim the text so that the number of tokens does not exceed max_tokens. Trim the text so that the number of tokens does not exceed max_completion_tokens.
Parameters: Parameters:
----------- -----------
@ -111,13 +111,13 @@ class TikTokenTokenizer(TokenizerInterface):
num_tokens = self.count_tokens(text) num_tokens = self.count_tokens(text)
# If the number of tokens is within the limit, return the text as is # If the number of tokens is within the limit, return the text as is
if num_tokens <= self.max_tokens: if num_tokens <= self.max_completion_tokens:
return text return text
# If the number exceeds the limit, trim the text # If the number exceeds the limit, trim the text
# This is a simple trim, it may cut words in half; consider using word boundaries for a cleaner cut # This is a simple trim, it may cut words in half; consider using word boundaries for a cleaner cut
encoded_text = self.tokenizer.encode(text) encoded_text = self.tokenizer.encode(text)
trimmed_encoded_text = encoded_text[: self.max_tokens] trimmed_encoded_text = encoded_text[: self.max_completion_tokens]
# Decoding the trimmed text # Decoding the trimmed text
trimmed_text = self.tokenizer.decode(trimmed_encoded_text) trimmed_text = self.tokenizer.decode(trimmed_encoded_text)
return trimmed_text return trimmed_text

View file

@ -32,13 +32,13 @@ def get_max_chunk_tokens():
# We need to make sure chunk size won't take more than half of LLM max context token size # We need to make sure chunk size won't take more than half of LLM max context token size
# but it also can't be bigger than the embedding engine max token size # but it also can't be bigger than the embedding engine max token size
llm_cutoff_point = llm_client.max_tokens // 2 # Round down the division llm_cutoff_point = llm_client.max_completion_tokens // 2 # Round down the division
max_chunk_tokens = min(embedding_engine.max_tokens, llm_cutoff_point) max_chunk_tokens = min(embedding_engine.max_completion_tokens, llm_cutoff_point)
return max_chunk_tokens return max_chunk_tokens
def get_model_max_tokens(model_name: str): def get_model_max_completion_tokens(model_name: str):
""" """
Retrieve the maximum token limit for a specified model name if it exists. Retrieve the maximum token limit for a specified model name if it exists.
@ -56,15 +56,15 @@ def get_model_max_tokens(model_name: str):
Number of max tokens of model, or None if model is unknown Number of max tokens of model, or None if model is unknown
""" """
max_tokens = None max_completion_tokens = None
if model_name in litellm.model_cost: if model_name in litellm.model_cost:
max_tokens = litellm.model_cost[model_name]["max_tokens"] max_completion_tokens = litellm.model_cost[model_name]["max_tokens"]
logger.debug(f"Max input tokens for {model_name}: {max_tokens}") logger.debug(f"Max input tokens for {model_name}: {max_completion_tokens}")
else: else:
logger.info("Model not found in LiteLLM's model_cost.") logger.info("Model not found in LiteLLM's model_cost.")
return max_tokens return max_completion_tokens
async def test_llm_connection(): async def test_llm_connection():

View file

@ -58,7 +58,7 @@ class LoaderInterface(ABC):
pass pass
@abstractmethod @abstractmethod
async def load(self, file_path: str, file_stream: Optional[Any] = None, **kwargs): async def load(self, file_path: str, **kwargs):
""" """
Load and process the file, returning standardized result. Load and process the file, returning standardized result.

View file

@ -1,8 +1,11 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import (
CogneeValidationError,
CogneeConfigurationError,
)
from fastapi import status from fastapi import status
class UnstructuredLibraryImportError(CogneeApiError): class UnstructuredLibraryImportError(CogneeConfigurationError):
def __init__( def __init__(
self, self,
message: str = "Import error. Unstructured library is not installed.", message: str = "Import error. Unstructured library is not installed.",
@ -12,7 +15,7 @@ class UnstructuredLibraryImportError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class UnauthorizedDataAccessError(CogneeApiError): class UnauthorizedDataAccessError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "User does not have permission to access this data.", message: str = "User does not have permission to access this data.",
@ -22,7 +25,7 @@ class UnauthorizedDataAccessError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DatasetNotFoundError(CogneeApiError): class DatasetNotFoundError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Dataset not found.", message: str = "Dataset not found.",
@ -32,7 +35,7 @@ class DatasetNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DatasetTypeError(CogneeApiError): class DatasetTypeError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Dataset type not supported.", message: str = "Dataset type not supported.",
@ -40,3 +43,13 @@ class DatasetTypeError(CogneeApiError):
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class InvalidTableAttributeError(CogneeValidationError):
def __init__(
self,
message: str = "The provided data object is missing the required '__tablename__' attribute.",
name: str = "InvalidTableAttributeError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)

View file

@ -1,4 +1,4 @@
from cognee.exceptions import InvalidAttributeError from cognee.modules.data.exceptions.exceptions import InvalidTableAttributeError
from cognee.modules.data.models import Data from cognee.modules.data.models import Data
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
@ -13,9 +13,7 @@ async def delete_data(data: Data):
ValueError: If the data object is invalid. ValueError: If the data object is invalid.
""" """
if not hasattr(data, "__tablename__"): if not hasattr(data, "__tablename__"):
raise InvalidAttributeError( raise InvalidTableAttributeError()
message="The provided data object is missing the required '__tablename__' attribute."
)
db_engine = get_relational_engine() db_engine = get_relational_engine()

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeSystemError
from fastapi import status from fastapi import status
class PyPdfInternalError(CogneeApiError): class PyPdfInternalError(CogneeSystemError):
"""Internal pypdf error""" """Internal pypdf error"""
def __init__( def __init__(

View file

@ -2,8 +2,11 @@ import time
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from typing import List, Dict, Union, Optional, Type from typing import List, Dict, Union, Optional, Type
from cognee.exceptions import InvalidValueError from cognee.modules.graph.exceptions import (
from cognee.modules.graph.exceptions import EntityNotFoundError, EntityAlreadyExistsError EntityNotFoundError,
EntityAlreadyExistsError,
InvalidDimensionsError,
)
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
@ -66,8 +69,7 @@ class CogneeGraph(CogneeAbstractGraph):
node_name: Optional[List[str]] = None, node_name: Optional[List[str]] = None,
) -> None: ) -> None:
if node_dimension < 1 or edge_dimension < 1: if node_dimension < 1 or edge_dimension < 1:
raise InvalidValueError(message="Dimensions must be positive integers") raise InvalidDimensionsError()
try: try:
import time import time

View file

@ -1,7 +1,6 @@
import numpy as np import numpy as np
from typing import List, Dict, Optional, Any, Union from typing import List, Dict, Optional, Any, Union
from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError
from cognee.exceptions import InvalidValueError
class Node: class Node:
@ -24,7 +23,7 @@ class Node:
self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1 self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
): ):
if dimension <= 0: if dimension <= 0:
raise InvalidValueError(message="Dimension must be a positive integer") raise InvalidDimensionsError()
self.id = node_id self.id = node_id
self.attributes = attributes if attributes is not None else {} self.attributes = attributes if attributes is not None else {}
self.attributes["vector_distance"] = float("inf") self.attributes["vector_distance"] = float("inf")
@ -58,9 +57,7 @@ class Node:
def is_node_alive_in_dimension(self, dimension: int) -> bool: def is_node_alive_in_dimension(self, dimension: int) -> bool:
if dimension < 0 or dimension >= len(self.status): if dimension < 0 or dimension >= len(self.status):
raise InvalidValueError( raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
)
return self.status[dimension] == 1 return self.status[dimension] == 1
def add_attribute(self, key: str, value: Any) -> None: def add_attribute(self, key: str, value: Any) -> None:
@ -110,7 +107,7 @@ class Edge:
dimension: int = 1, dimension: int = 1,
): ):
if dimension <= 0: if dimension <= 0:
raise InvalidValueError(message="Dimensions must be a positive integer.") raise InvalidDimensionsError()
self.node1 = node1 self.node1 = node1
self.node2 = node2 self.node2 = node2
self.attributes = attributes if attributes is not None else {} self.attributes = attributes if attributes is not None else {}
@ -120,9 +117,7 @@ class Edge:
def is_edge_alive_in_dimension(self, dimension: int) -> bool: def is_edge_alive_in_dimension(self, dimension: int) -> bool:
if dimension < 0 or dimension >= len(self.status): if dimension < 0 or dimension >= len(self.status):
raise InvalidValueError( raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
)
return self.status[dimension] == 1 return self.status[dimension] == 1
def add_attribute(self, key: str, value: Any) -> None: def add_attribute(self, key: str, value: Any) -> None:

View file

@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various graph errors
from .exceptions import ( from .exceptions import (
EntityNotFoundError, EntityNotFoundError,
EntityAlreadyExistsError, EntityAlreadyExistsError,
InvalidDimensionsError,
DimensionOutOfRangeError,
) )

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeValidationError
from fastapi import status from fastapi import status
class EntityNotFoundError(CogneeApiError): class EntityNotFoundError(CogneeValidationError):
"""Database returns nothing""" """Database returns nothing"""
def __init__( def __init__(
@ -14,7 +14,7 @@ class EntityNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class EntityAlreadyExistsError(CogneeApiError): class EntityAlreadyExistsError(CogneeValidationError):
"""Conflict detected, like trying to create a resource that already exists""" """Conflict detected, like trying to create a resource that already exists"""
def __init__( def __init__(
@ -24,3 +24,25 @@ class EntityAlreadyExistsError(CogneeApiError):
status_code=status.HTTP_409_CONFLICT, status_code=status.HTTP_409_CONFLICT,
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class InvalidDimensionsError(CogneeValidationError):
def __init__(
self,
name: str = "InvalidDimensionsError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = "Dimensions must be positive integers."
super().__init__(message, name, status_code)
class DimensionOutOfRangeError(CogneeValidationError):
def __init__(
self,
dimension: int,
max_index: int,
name: str = "DimensionOutOfRangeError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = f"Dimension {dimension} is out of range. Valid range is 0 to {max_index}."
super().__init__(message, name, status_code)

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeValidationError
from fastapi import status from fastapi import status
class IngestionError(CogneeApiError): class IngestionError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Type of data sent to classify not supported.", message: str = "Type of data sent to classify not supported.",

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeSystemError
from fastapi import status from fastapi import status
class OntologyInitializationError(CogneeApiError): class OntologyInitializationError(CogneeSystemError):
def __init__( def __init__(
self, self,
message: str = "Ontology initialization failed", message: str = "Ontology initialization failed",
@ -12,7 +12,7 @@ class OntologyInitializationError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class FindClosestMatchError(CogneeApiError): class FindClosestMatchError(CogneeSystemError):
def __init__( def __init__(
self, self,
message: str = "Error in find_closest_match", message: str = "Error in find_closest_match",
@ -22,7 +22,7 @@ class FindClosestMatchError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class GetSubgraphError(CogneeApiError): class GetSubgraphError(CogneeSystemError):
def __init__( def __init__(
self, self,
message: str = "Failed to retrieve subgraph", message: str = "Failed to retrieve subgraph",

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeSystemError
from fastapi import status from fastapi import status
class PipelineRunFailedError(CogneeApiError): class PipelineRunFailedError(CogneeSystemError):
def __init__( def __init__(
self, self,
message: str = "Pipeline run failed.", message: str = "Pipeline run failed.",

View file

@ -1,6 +1,5 @@
from typing import Any, Optional from typing import Any, Optional
from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.base_retriever import BaseRetriever
from cognee.modules.retrieval.utils.completion import generate_completion from cognee.modules.retrieval.utils.completion import generate_completion
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported, CypherSearchError from cognee.modules.retrieval.exceptions import SearchTypeNotSupported, CypherSearchError
@ -31,8 +30,7 @@ class CypherSearchRetriever(BaseRetriever):
""" """
Retrieves relevant context using a cypher query. Retrieves relevant context using a cypher query.
If the graph engine is an instance of NetworkXAdapter, raises SearchTypeNotSupported. If If any error occurs during execution, logs the error and raises CypherSearchError.
any error occurs during execution, logs the error and raises CypherSearchError.
Parameters: Parameters:
----------- -----------
@ -46,12 +44,6 @@ class CypherSearchRetriever(BaseRetriever):
""" """
try: try:
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
if isinstance(graph_engine, NetworkXAdapter):
raise SearchTypeNotSupported(
"CYPHER search type not supported for NetworkXAdapter."
)
result = await graph_engine.query(query) result = await graph_engine.query(query)
except Exception as e: except Exception as e:
logger.error("Failed to execture cypher search retrieval: %s", str(e)) logger.error("Failed to execture cypher search retrieval: %s", str(e))

View file

@ -1,8 +1,8 @@
from fastapi import status from fastapi import status
from cognee.exceptions import CogneeApiError, CriticalError from cognee.exceptions import CogneeValidationError, CogneeSystemError
class SearchTypeNotSupported(CogneeApiError): class SearchTypeNotSupported(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "CYPHER search type not supported by the adapter.", message: str = "CYPHER search type not supported by the adapter.",
@ -12,7 +12,7 @@ class SearchTypeNotSupported(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class CypherSearchError(CogneeApiError): class CypherSearchError(CogneeSystemError):
def __init__( def __init__(
self, self,
message: str = "An error occurred during the execution of the Cypher query.", message: str = "An error occurred during the execution of the Cypher query.",
@ -22,11 +22,17 @@ class CypherSearchError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class NoDataError(CriticalError): class NoDataError(CogneeValidationError):
message: str = "No data found in the system, please add data first." def __init__(
self,
message: str = "No data found in the system, please add data first.",
name: str = "NoDataError",
status_code: int = status.HTTP_404_NOT_FOUND,
):
super().__init__(message, name, status_code)
class CollectionDistancesNotFoundError(CogneeApiError): class CollectionDistancesNotFoundError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "No collection distances found for the given query.", message: str = "No collection distances found for the given query.",

View file

@ -1,7 +1,6 @@
from typing import Any, Optional from typing import Any, Optional
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.base_retriever import BaseRetriever
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
@ -123,9 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever):
""" """
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
if isinstance(graph_engine, (NetworkXAdapter)):
raise SearchTypeNotSupported("Natural language search type not supported.")
return await self._execute_cypher_query(query, graph_engine) return await self._execute_cypher_query(query, graph_engine)
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:

View file

@ -0,0 +1,7 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various data errors
"""
from .exceptions import UnsupportedSearchTypeError

View file

@ -0,0 +1,15 @@
from cognee.exceptions import (
CogneeValidationError,
)
from fastapi import status
class UnsupportedSearchTypeError(CogneeValidationError):
def __init__(
self,
search_type: str,
name: str = "UnsupportedSearchTypeError",
status_code: int = status.HTTP_400_BAD_REQUEST,
):
message = f"Unsupported search type: {search_type}"
super().__init__(message, name, status_code)

View file

@ -3,9 +3,8 @@ import json
import asyncio import asyncio
from uuid import UUID from uuid import UUID
from typing import Callable, List, Optional, Type, Union from typing import Callable, List, Optional, Type, Union
from cognee.modules.search.exceptions import UnsupportedSearchTypeError
from cognee.context_global_variables import set_database_global_context_variables from cognee.context_global_variables import set_database_global_context_variables
from cognee.exceptions import InvalidValueError
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.insights_retriever import InsightsRetriever
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
@ -143,7 +142,7 @@ async def specific_search(
search_task = search_tasks.get(query_type) search_task = search_tasks.get(query_type)
if search_task is None: if search_task is None:
raise InvalidValueError(message=f"Unsupported search type: {query_type}") raise UnsupportedSearchTypeError(str(query_type))
send_telemetry("cognee.search EXECUTION STARTED", user.id) send_telemetry("cognee.search EXECUTION STARTED", user.id)

View file

@ -88,8 +88,8 @@ def get_settings() -> SettingsDict:
"models": { "models": {
"openai": [ "openai": [
{ {
"value": "gpt-4o-mini", "value": "gpt-5-mini",
"label": "gpt-4o-mini", "label": "gpt-5-mini",
}, },
{ {
"value": "gpt-4o", "value": "gpt-4o",

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeValidationError
from fastapi import status from fastapi import status
class RoleNotFoundError(CogneeApiError): class RoleNotFoundError(CogneeValidationError):
"""User group not found""" """User group not found"""
def __init__( def __init__(
@ -14,7 +14,7 @@ class RoleNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class TenantNotFoundError(CogneeApiError): class TenantNotFoundError(CogneeValidationError):
"""User group not found""" """User group not found"""
def __init__( def __init__(
@ -26,7 +26,7 @@ class TenantNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class UserNotFoundError(CogneeApiError): class UserNotFoundError(CogneeValidationError):
"""User not found""" """User not found"""
def __init__( def __init__(
@ -38,7 +38,7 @@ class UserNotFoundError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class PermissionDeniedError(CogneeApiError): class PermissionDeniedError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "User does not have permission on documents.", message: str = "User does not have permission on documents.",
@ -48,7 +48,7 @@ class PermissionDeniedError(CogneeApiError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class PermissionNotFoundError(CogneeApiError): class PermissionNotFoundError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Permission type does not exist.", message: str = "Permission type does not exist.",

View file

@ -1,8 +1,8 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeValidationError
from fastapi import status from fastapi import status
class IngestionError(CogneeApiError): class IngestionError(CogneeValidationError):
def __init__( def __init__(
self, self,
message: str = "Failed to load data.", message: str = "Failed to load data.",

View file

@ -3,7 +3,6 @@
import os import os
import requests import requests
from datetime import datetime, timezone from datetime import datetime, timezone
import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import http.server import http.server
import socketserver import socketserver

View file

@ -1,11 +1,11 @@
from cognee.exceptions import CogneeApiError from cognee.exceptions import CogneeValidationError
from fastapi import status from fastapi import status
class NoRelevantDataError(CogneeApiError): class NoRelevantDataError(CogneeValidationError):
""" """
Represents an error when no relevant data is found during a search. This class is a Represents an error when no relevant data is found during a search. This class is a
subclass of CogneeApiError. subclass of CogneeValidationError.
Public methods: Public methods:

View file

@ -10,6 +10,7 @@ from cognee.modules.data.processing.document_types import (
) )
from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.engine.models.node_set import NodeSet
from cognee.modules.engine.utils.generate_node_id import generate_node_id from cognee.modules.engine.utils.generate_node_id import generate_node_id
from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
EXTENSION_TO_DOCUMENT_CLASS = { EXTENSION_TO_DOCUMENT_CLASS = {
"pdf": PdfDocument, # Text documents "pdf": PdfDocument, # Text documents
@ -111,6 +112,9 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
- list[Document]: A list of Document objects created based on the classified data - list[Document]: A list of Document objects created based on the classified data
documents. documents.
""" """
if not isinstance(data_documents, list):
raise WrongDataDocumentInputError("data_documents")
documents = [] documents = []
for data_item in data_documents: for data_item in data_documents:
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension]( document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](

View file

@ -0,0 +1,11 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various data errors
"""
from .exceptions import (
WrongDataDocumentInputError,
InvalidChunkSizeError,
InvalidChunkerError,
)

View file

@ -0,0 +1,36 @@
from cognee.exceptions import (
CogneeValidationError,
CogneeConfigurationError,
)
from fastapi import status
class WrongDataDocumentInputError(CogneeValidationError):
"""Raised when a wrong data document is provided."""
def __init__(
self,
field: str,
name: str = "WrongDataDocumentInputError",
status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
):
message = f"Missing of invalid parameter: '{field}'."
super().__init__(message, name, status_code)
class InvalidChunkSizeError(CogneeValidationError):
def __init__(self, value):
super().__init__(
message=f"max_chunk_size must be a positive integer (got {value}).",
name="InvalidChunkSizeError",
status_code=status.HTTP_400_BAD_REQUEST,
)
class InvalidChunkerError(CogneeValidationError):
def __init__(self):
super().__init__(
message="chunker must be a valid Chunker class.",
name="InvalidChunkerError",
status_code=status.HTTP_400_BAD_REQUEST,
)

View file

@ -8,6 +8,7 @@ from cognee.modules.data.models import Data
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.chunking.Chunker import Chunker from cognee.modules.chunking.Chunker import Chunker
from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError
async def update_document_token_count(document_id: UUID, token_count: int) -> None: async def update_document_token_count(document_id: UUID, token_count: int) -> None:
@ -37,6 +38,13 @@ async def extract_chunks_from_documents(
- The `read` method of the `Document` class must be implemented to support the chunking operation. - The `read` method of the `Document` class must be implemented to support the chunking operation.
- The `chunker` parameter determines the chunking logic and should align with the document type. - The `chunker` parameter determines the chunking logic and should align with the document type.
""" """
if not isinstance(max_chunk_size, int) or max_chunk_size <= 0:
raise InvalidChunkSizeError(max_chunk_size)
if not isinstance(chunker, type):
raise InvalidChunkerError()
if not hasattr(chunker, "read"):
raise InvalidChunkerError()
for document in documents: for document in documents:
document_token_count = 0 document_token_count = 0
@ -48,5 +56,3 @@ async def extract_chunks_from_documents(
yield document_chunk yield document_chunk
await update_document_token_count(document.id, document_token_count) await update_document_token_count(document.id, document_token_count)
# todo rita

View file

@ -0,0 +1,12 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various data errors
"""
from .exceptions import (
InvalidDataChunksError,
InvalidGraphModelError,
InvalidOntologyAdapterError,
InvalidChunkGraphInputError,
)

View file

@ -0,0 +1,41 @@
from cognee.exceptions import (
CogneeValidationError,
CogneeConfigurationError,
)
from fastapi import status
class InvalidDataChunksError(CogneeValidationError):
def __init__(self, detail: str):
super().__init__(
message=f"Invalid data_chunks: {detail}",
name="InvalidDataChunksError",
status_code=status.HTTP_400_BAD_REQUEST,
)
class InvalidGraphModelError(CogneeValidationError):
def __init__(self, got):
super().__init__(
message=f"graph_model must be a subclass of BaseModel (got {got}).",
name="InvalidGraphModelError",
status_code=status.HTTP_400_BAD_REQUEST,
)
class InvalidOntologyAdapterError(CogneeConfigurationError):
def __init__(self, got):
super().__init__(
message=f"ontology_adapter lacks required interface (got {got}).",
name="InvalidOntologyAdapterError",
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
class InvalidChunkGraphInputError(CogneeValidationError):
def __init__(self, detail: str):
super().__init__(
message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}",
name="InvalidChunkGraphInputError",
status_code=status.HTTP_400_BAD_REQUEST,
)

View file

@ -12,6 +12,12 @@ from cognee.modules.graph.utils import (
) )
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.tasks.graph.exceptions import (
InvalidGraphModelError,
InvalidDataChunksError,
InvalidChunkGraphInputError,
InvalidOntologyAdapterError,
)
async def integrate_chunk_graphs( async def integrate_chunk_graphs(
@ -21,6 +27,20 @@ async def integrate_chunk_graphs(
ontology_adapter: OntologyResolver, ontology_adapter: OntologyResolver,
) -> List[DocumentChunk]: ) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases.""" """Updates DocumentChunk objects, integrates data points and edges into databases."""
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
if len(data_chunks) != len(chunk_graphs):
raise InvalidChunkGraphInputError(
f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs."
)
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model)
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
raise InvalidOntologyAdapterError(
type(ontology_adapter).__name__ if ontology_adapter else "None"
)
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
if graph_model is not KnowledgeGraph: if graph_model is not KnowledgeGraph:
@ -55,6 +75,14 @@ async def extract_graph_from_data(
""" """
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model. Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
""" """
if not isinstance(data_chunks, list) or not data_chunks:
raise InvalidDataChunksError("must be a non-empty list of DocumentChunk.")
if not all(hasattr(c, "text") for c in data_chunks):
raise InvalidDataChunksError("each chunk must have a 'text' attribute")
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model)
chunk_graphs = await asyncio.gather( chunk_graphs = await asyncio.gather(
*[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
) )

View file

@ -0,0 +1,8 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various application errors,
such as System, Validation, Configuration or TransientErrors
"""
from .exceptions import S3FileSystemNotFoundError

View file

@ -0,0 +1,12 @@
from cognee.exceptions import CogneeSystemError
from fastapi import status
class S3FileSystemNotFoundError(CogneeSystemError):
def __init__(
self,
name: str = "S3FileSystemNotFoundError",
status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
):
message = "Could not find S3FileSystem."
super().__init__(message, name, status_code)

Some files were not shown because too many files have changed in this diff Show more