Merge branch 'dev' into feature/cog-2734-cognee-feedbacks-interactions-poc-to-prod
This commit is contained in:
commit
d53ebb2164
124 changed files with 1337 additions and 2625 deletions
|
|
@ -6,7 +6,7 @@
|
||||||
# Default graph database : Kuzu
|
# Default graph database : Kuzu
|
||||||
#
|
#
|
||||||
# These default databases are all file-based, so no extra setup is needed
|
# These default databases are all file-based, so no extra setup is needed
|
||||||
# for local use.
|
# for local use. The data by default will be stored in your .venv
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
@ -16,7 +16,7 @@
|
||||||
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
|
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
|
||||||
|
|
||||||
LLM_API_KEY="your_api_key"
|
LLM_API_KEY="your_api_key"
|
||||||
LLM_MODEL="openai/gpt-4o-mini"
|
LLM_MODEL="openai/gpt-5-mini"
|
||||||
LLM_PROVIDER="openai"
|
LLM_PROVIDER="openai"
|
||||||
LLM_ENDPOINT=""
|
LLM_ENDPOINT=""
|
||||||
LLM_API_VERSION=""
|
LLM_API_VERSION=""
|
||||||
|
|
@ -33,11 +33,20 @@ EMBEDDING_MAX_TOKENS=8191
|
||||||
|
|
||||||
# If using BAML structured output these env variables will be used
|
# If using BAML structured output these env variables will be used
|
||||||
BAML_LLM_PROVIDER=openai
|
BAML_LLM_PROVIDER=openai
|
||||||
BAML_LLM_MODEL="gpt-4o-mini"
|
BAML_LLM_MODEL="gpt-5-mini"
|
||||||
BAML_LLM_ENDPOINT=""
|
BAML_LLM_ENDPOINT=""
|
||||||
BAML_LLM_API_KEY="your_api_key"
|
BAML_LLM_API_KEY="your_api_key"
|
||||||
BAML_LLM_API_VERSION=""
|
BAML_LLM_API_VERSION=""
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# 📂 ROOT DIRECTORY FOR DATABASES
|
||||||
|
################################################################################
|
||||||
|
# Set up the Cognee system directory. Cognee will store system files and databases here.
|
||||||
|
# Useful for setting root directory inside docker and also to avoid storing the databases in .venv
|
||||||
|
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
|
||||||
|
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# 🗄️ Relational database settings
|
# 🗄️ Relational database settings
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
@ -85,12 +94,7 @@ VECTOR_DB_PROVIDER="lancedb"
|
||||||
VECTOR_DB_URL=
|
VECTOR_DB_URL=
|
||||||
VECTOR_DB_KEY=
|
VECTOR_DB_KEY=
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# 📂 ROOT DIRECTORY IF USING COGNEE LIB INSIDE A DOCKER
|
|
||||||
################################################################################
|
|
||||||
# Set up the Cognee system directory. Cognee will store system files and databases here.
|
|
||||||
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
|
|
||||||
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
|
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
|
||||||
|
|
@ -157,8 +161,8 @@ LITELLM_LOG="ERROR"
|
||||||
# Uncomment + fill values to switch.
|
# Uncomment + fill values to switch.
|
||||||
|
|
||||||
########## Azure OpenAI #######################################################
|
########## Azure OpenAI #######################################################
|
||||||
#LLM_MODEL="azure/gpt-4o-mini"
|
#LLM_MODEL="azure/gpt-5-mini"
|
||||||
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-4o-mini"
|
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-5-mini"
|
||||||
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
|
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
|
||||||
#LLM_API_VERSION="2024-12-01-preview"
|
#LLM_API_VERSION="2024-12-01-preview"
|
||||||
|
|
||||||
|
|
|
||||||
31
.github/actions/cognee_setup/action.yml
vendored
31
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -1,11 +1,15 @@
|
||||||
name: cognee-setup
|
name: cognee-setup
|
||||||
description: "Sets up Python, installs Poetry, loads venv from cache, and installs dependencies for Cognee."
|
description: "Sets up Python, installs uv, and installs dependencies for Cognee."
|
||||||
|
|
||||||
inputs:
|
inputs:
|
||||||
python-version:
|
python-version:
|
||||||
description: "Which Python version to use"
|
description: "Which Python version to use"
|
||||||
required: false
|
required: false
|
||||||
default: "3.11.x"
|
default: "3.11.x"
|
||||||
|
extra-dependencies:
|
||||||
|
description: "Additional extra dependencies to install (space-separated)"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
|
|
@ -16,18 +20,25 @@ runs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install Poetry
|
- name: Install uv
|
||||||
shell: bash
|
uses: astral-sh/setup-uv@v4
|
||||||
run: |
|
with:
|
||||||
python -m pip install --upgrade pip
|
enable-cache: true
|
||||||
pip install poetry
|
|
||||||
|
|
||||||
- name: Rebuild Poetry lock file
|
- name: Rebuild uv lockfile
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
rm poetry.lock
|
rm uv.lock
|
||||||
poetry lock
|
uv lock
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
shell: bash
|
shell: bash
|
||||||
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev -E neo4j
|
run: |
|
||||||
|
EXTRA_ARGS=""
|
||||||
|
if [ -n "${{ inputs.extra-dependencies }}" ]; then
|
||||||
|
IFS=' ' read -r -a deps <<< "${{ inputs.extra-dependencies }}"
|
||||||
|
for extra in "${deps[@]}"; do
|
||||||
|
EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
|
||||||
|
|
|
||||||
67
.github/actions/setup_neo4j/action.yml
vendored
Normal file
67
.github/actions/setup_neo4j/action.yml
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
name: 'Setup Neo4j with Graph Data Science'
|
||||||
|
description: 'Sets up a Neo4j instance with APOC and Graph Data Science plugins for testing'
|
||||||
|
inputs:
|
||||||
|
neo4j-version:
|
||||||
|
description: 'Neo4j version to use'
|
||||||
|
required: false
|
||||||
|
default: '5.21'
|
||||||
|
neo4j-password:
|
||||||
|
description: 'Password for Neo4j'
|
||||||
|
required: false
|
||||||
|
default: 'cognee_test_password'
|
||||||
|
outputs:
|
||||||
|
neo4j-url:
|
||||||
|
description: 'Neo4j connection URL'
|
||||||
|
value: 'bolt://localhost:7687'
|
||||||
|
neo4j-username:
|
||||||
|
description: 'Neo4j username'
|
||||||
|
value: 'neo4j'
|
||||||
|
neo4j-password:
|
||||||
|
description: 'Neo4j password'
|
||||||
|
value: ${{ inputs.neo4j-password }}
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Start Neo4j with GDS
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
docker run -d \
|
||||||
|
--name neo4j-test \
|
||||||
|
-p 7474:7474 -p 7687:7687 \
|
||||||
|
-e NEO4J_AUTH="neo4j/${{ inputs.neo4j-password }}" \
|
||||||
|
-e NEO4J_PLUGINS='["apoc", "graph-data-science"]' \
|
||||||
|
-e NEO4J_dbms_security_procedures_unrestricted="apoc.*,gds.*" \
|
||||||
|
-e NEO4J_apoc_export_file_enabled=true \
|
||||||
|
-e NEO4J_apoc_import_file_enabled=true \
|
||||||
|
neo4j:${{ inputs.neo4j-version }}
|
||||||
|
|
||||||
|
- name: Wait for Neo4j to be ready
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Neo4j to start..."
|
||||||
|
timeout=60
|
||||||
|
counter=0
|
||||||
|
|
||||||
|
while [ $counter -lt $timeout ]; do
|
||||||
|
if docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" "RETURN 1" > /dev/null 2>&1; then
|
||||||
|
echo "Neo4j is ready!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Waiting... ($counter/$timeout)"
|
||||||
|
sleep 2
|
||||||
|
counter=$((counter + 2))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $counter -ge $timeout ]; then
|
||||||
|
echo "Neo4j failed to start within $timeout seconds"
|
||||||
|
docker logs neo4j-test
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Verify GDS is available
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "Verifying Graph Data Science library is available..."
|
||||||
|
docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" \
|
||||||
|
"CALL gds.version() YIELD gdsVersion RETURN gdsVersion"
|
||||||
|
echo "GDS verification complete!"
|
||||||
10
.github/workflows/basic_tests.yml
vendored
10
.github/workflows/basic_tests.yml
vendored
|
|
@ -98,7 +98,7 @@ jobs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Run Unit Tests
|
- name: Run Unit Tests
|
||||||
run: poetry run pytest cognee/tests/unit/
|
run: uv run pytest cognee/tests/unit/
|
||||||
|
|
||||||
integration-tests:
|
integration-tests:
|
||||||
name: Run Integration Tests
|
name: Run Integration Tests
|
||||||
|
|
@ -115,7 +115,7 @@ jobs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Run Integration Tests
|
- name: Run Integration Tests
|
||||||
run: poetry run pytest cognee/tests/integration/
|
run: uv run pytest cognee/tests/integration/
|
||||||
|
|
||||||
simple-examples:
|
simple-examples:
|
||||||
name: Run Simple Examples
|
name: Run Simple Examples
|
||||||
|
|
@ -144,7 +144,7 @@ jobs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Run Simple Examples
|
- name: Run Simple Examples
|
||||||
run: poetry run python ./examples/python/simple_example.py
|
run: uv run python ./examples/python/simple_example.py
|
||||||
|
|
||||||
simple-examples-baml:
|
simple-examples-baml:
|
||||||
name: Run Simple Examples BAML
|
name: Run Simple Examples BAML
|
||||||
|
|
@ -180,7 +180,7 @@ jobs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Run Simple Examples
|
- name: Run Simple Examples
|
||||||
run: poetry run python ./examples/python/simple_example.py
|
run: uv run python ./examples/python/simple_example.py
|
||||||
|
|
||||||
graph-tests:
|
graph-tests:
|
||||||
name: Run Basic Graph Tests
|
name: Run Basic Graph Tests
|
||||||
|
|
@ -209,4 +209,4 @@ jobs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Run Graph Tests
|
- name: Run Graph Tests
|
||||||
run: poetry run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph
|
run: uv run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph
|
||||||
|
|
|
||||||
20
.github/workflows/db_examples_tests.yml
vendored
20
.github/workflows/db_examples_tests.yml
vendored
|
|
@ -54,10 +54,6 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install Neo4j extra
|
|
||||||
run: |
|
|
||||||
poetry install -E neo4j
|
|
||||||
|
|
||||||
- name: Run Neo4j Example
|
- name: Run Neo4j Example
|
||||||
env:
|
env:
|
||||||
ENV: dev
|
ENV: dev
|
||||||
|
|
@ -74,7 +70,7 @@ jobs:
|
||||||
GRAPH_DATABASE_USERNAME: "neo4j"
|
GRAPH_DATABASE_USERNAME: "neo4j"
|
||||||
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
|
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
|
||||||
run: |
|
run: |
|
||||||
poetry run python examples/database_examples/neo4j_example.py
|
uv run python examples/database_examples/neo4j_example.py
|
||||||
|
|
||||||
run-db-example-kuzu:
|
run-db-example-kuzu:
|
||||||
name: "Kuzu DB Example Test"
|
name: "Kuzu DB Example Test"
|
||||||
|
|
@ -90,9 +86,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install Kuzu extra
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run Kuzu Example
|
- name: Run Kuzu Example
|
||||||
env:
|
env:
|
||||||
|
|
@ -107,7 +102,7 @@ jobs:
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
GRAPH_DATABASE_PROVIDER: "kuzu"
|
GRAPH_DATABASE_PROVIDER: "kuzu"
|
||||||
run: |
|
run: |
|
||||||
poetry run python examples/database_examples/kuzu_example.py
|
uv run python examples/database_examples/kuzu_example.py
|
||||||
|
|
||||||
run-db-example-pgvector:
|
run-db-example-pgvector:
|
||||||
name: "PostgreSQL PGVector DB Example Test"
|
name: "PostgreSQL PGVector DB Example Test"
|
||||||
|
|
@ -138,10 +133,7 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
extra-dependencies: "postgres"
|
||||||
- name: Install PGVector extra
|
|
||||||
run: |
|
|
||||||
poetry install -E postgres
|
|
||||||
|
|
||||||
- name: Run PGVector Example
|
- name: Run PGVector Example
|
||||||
env:
|
env:
|
||||||
|
|
@ -155,4 +147,4 @@ jobs:
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: |
|
run: |
|
||||||
poetry run python examples/database_examples/pgvector_example.py
|
uv run python examples/database_examples/pgvector_example.py
|
||||||
|
|
|
||||||
44
.github/workflows/e2e_tests.yml
vendored
44
.github/workflows/e2e_tests.yml
vendored
|
|
@ -57,7 +57,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_cognee_server_start.py
|
run: uv run python ./cognee/tests/test_cognee_server_start.py
|
||||||
|
|
||||||
run-telemetry-test:
|
run-telemetry-test:
|
||||||
name: Run Telemetry Test
|
name: Run Telemetry Test
|
||||||
|
|
@ -81,7 +81,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_telemetry.py
|
run: uv run python ./cognee/tests/test_telemetry.py
|
||||||
|
|
||||||
run-telemetry-pipeline-test:
|
run-telemetry-pipeline-test:
|
||||||
name: Run Telemetry Pipeline Test
|
name: Run Telemetry Pipeline Test
|
||||||
|
|
@ -110,7 +110,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_library.py
|
run: uv run python ./cognee/tests/test_library.py
|
||||||
|
|
||||||
run-deduplication-test:
|
run-deduplication-test:
|
||||||
name: Deduplication Test
|
name: Deduplication Test
|
||||||
|
|
@ -140,10 +140,7 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "postgres"
|
||||||
- name: Install specific db dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E postgres
|
|
||||||
|
|
||||||
- name: Run Deduplication Example
|
- name: Run Deduplication Example
|
||||||
env:
|
env:
|
||||||
|
|
@ -154,7 +151,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_deduplication.py
|
run: uv run python ./cognee/tests/test_deduplication.py
|
||||||
|
|
||||||
run-deletion-test:
|
run-deletion-test:
|
||||||
name: Deletion Test
|
name: Deletion Test
|
||||||
|
|
@ -179,7 +176,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_deletion.py
|
run: uv run python ./cognee/tests/test_deletion.py
|
||||||
|
|
||||||
run-s3-bucket-test:
|
run-s3-bucket-test:
|
||||||
name: S3 Bucket Test
|
name: S3 Bucket Test
|
||||||
|
|
@ -192,10 +189,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "aws"
|
||||||
|
|
||||||
- name: Install specific S3 dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E aws
|
|
||||||
|
|
||||||
- name: Run S3 Bucket Test
|
- name: Run S3 Bucket Test
|
||||||
env:
|
env:
|
||||||
|
|
@ -210,7 +207,7 @@ jobs:
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
run: poetry run python ./cognee/tests/test_s3.py
|
run: uv run python ./cognee/tests/test_s3.py
|
||||||
|
|
||||||
test-parallel-databases:
|
test-parallel-databases:
|
||||||
name: Test using different async databases in parallel in Cognee
|
name: Test using different async databases in parallel in Cognee
|
||||||
|
|
@ -224,9 +221,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
|
||||||
- name: Install specific graph db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run parallel databases test
|
- name: Run parallel databases test
|
||||||
env:
|
env:
|
||||||
|
|
@ -239,7 +235,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_parallel_databases.py
|
run: uv run python ./cognee/tests/test_parallel_databases.py
|
||||||
|
|
||||||
test-permissions:
|
test-permissions:
|
||||||
name: Test permissions with different situations in Cognee
|
name: Test permissions with different situations in Cognee
|
||||||
|
|
@ -253,9 +249,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
|
||||||
- name: Install specific graph db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run parallel databases test
|
- name: Run parallel databases test
|
||||||
env:
|
env:
|
||||||
|
|
@ -268,7 +263,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_permissions.py
|
run: uv run python ./cognee/tests/test_permissions.py
|
||||||
|
|
||||||
test-graph-edges:
|
test-graph-edges:
|
||||||
name: Test graph edge ingestion
|
name: Test graph edge ingestion
|
||||||
|
|
@ -282,9 +277,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
|
||||||
- name: Install specific graph db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run graph edges test
|
- name: Run graph edges test
|
||||||
env:
|
env:
|
||||||
|
|
@ -297,4 +291,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_edge_ingestion.py
|
run: uv run python ./cognee/tests/test_edge_ingestion.py
|
||||||
|
|
|
||||||
21
.github/workflows/examples_tests.yml
vendored
21
.github/workflows/examples_tests.yml
vendored
|
|
@ -20,7 +20,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
run: poetry run python ./examples/python/multimedia_example.py
|
run: uv run python ./examples/python/multimedia_example.py
|
||||||
|
|
||||||
test-eval-example:
|
test-eval-example:
|
||||||
name: Run Eval Example
|
name: Run Eval Example
|
||||||
|
|
@ -33,10 +33,7 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "deepeval"
|
||||||
- name: Install specific eval dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E deepeval
|
|
||||||
|
|
||||||
- name: Run Evaluation Framework Example
|
- name: Run Evaluation Framework Example
|
||||||
env:
|
env:
|
||||||
|
|
@ -49,7 +46,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/eval_framework/run_eval.py
|
run: uv run python ./cognee/eval_framework/run_eval.py
|
||||||
|
|
||||||
test-descriptive-metrics:
|
test-descriptive-metrics:
|
||||||
name: Run Descriptive Metrics Example
|
name: Run Descriptive Metrics Example
|
||||||
|
|
@ -63,6 +60,10 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
|
||||||
|
- name: Setup Neo4j with GDS
|
||||||
|
uses: ./.github/actions/setup_neo4j
|
||||||
|
id: neo4j
|
||||||
|
|
||||||
- name: Run Descriptive Graph Metrics Example
|
- name: Run Descriptive Graph Metrics Example
|
||||||
env:
|
env:
|
||||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||||
|
|
@ -74,7 +75,11 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py
|
GRAPH_DATABASE_PROVIDER: "neo4j"
|
||||||
|
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
|
||||||
|
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
|
||||||
|
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
|
||||||
|
run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
|
||||||
|
|
||||||
|
|
||||||
test-dynamic-steps-metrics:
|
test-dynamic-steps-metrics:
|
||||||
|
|
@ -100,4 +105,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./examples/python/dynamic_steps_example.py
|
run: uv run python ./examples/python/dynamic_steps_example.py
|
||||||
|
|
|
||||||
17
.github/workflows/graph_db_tests.yml
vendored
17
.github/workflows/graph_db_tests.yml
vendored
|
|
@ -28,9 +28,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run Kuzu Tests
|
- name: Run Kuzu Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -43,7 +42,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_kuzu.py
|
run: uv run python ./cognee/tests/test_kuzu.py
|
||||||
|
|
||||||
- name: Run Weighted Edges Tests with Kuzu
|
- name: Run Weighted Edges Tests with Kuzu
|
||||||
env:
|
env:
|
||||||
|
|
@ -57,7 +56,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v
|
run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v
|
||||||
|
|
||||||
run-neo4j-tests:
|
run-neo4j-tests:
|
||||||
name: Neo4j Tests
|
name: Neo4j Tests
|
||||||
|
|
@ -72,10 +71,6 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install specific db dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E neo4j
|
|
||||||
|
|
||||||
- name: Run default Neo4j
|
- name: Run default Neo4j
|
||||||
env:
|
env:
|
||||||
ENV: 'dev'
|
ENV: 'dev'
|
||||||
|
|
@ -91,7 +86,7 @@ jobs:
|
||||||
GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
|
GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
|
||||||
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
|
GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
|
||||||
GRAPH_DATABASE_USERNAME: "neo4j"
|
GRAPH_DATABASE_USERNAME: "neo4j"
|
||||||
run: poetry run python ./cognee/tests/test_neo4j.py
|
run: uv run python ./cognee/tests/test_neo4j.py
|
||||||
|
|
||||||
- name: Run Weighted Edges Tests with Neo4j
|
- name: Run Weighted Edges Tests with Neo4j
|
||||||
env:
|
env:
|
||||||
|
|
@ -108,4 +103,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v
|
run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v
|
||||||
|
|
|
||||||
10
.github/workflows/python_version_tests.yml
vendored
10
.github/workflows/python_version_tests.yml
vendored
|
|
@ -55,7 +55,7 @@ jobs:
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: poetry run pytest cognee/tests/unit/
|
run: uv run pytest cognee/tests/unit/
|
||||||
env:
|
env:
|
||||||
PYTHONUTF8: 1
|
PYTHONUTF8: 1
|
||||||
LLM_PROVIDER: openai
|
LLM_PROVIDER: openai
|
||||||
|
|
@ -73,7 +73,7 @@ jobs:
|
||||||
- name: Run integration tests
|
- name: Run integration tests
|
||||||
if: ${{ !contains(matrix.os, 'windows') }}
|
if: ${{ !contains(matrix.os, 'windows') }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: poetry run pytest cognee/tests/integration/
|
run: uv run pytest cognee/tests/integration/
|
||||||
env:
|
env:
|
||||||
PYTHONUTF8: 1
|
PYTHONUTF8: 1
|
||||||
LLM_PROVIDER: openai
|
LLM_PROVIDER: openai
|
||||||
|
|
@ -103,11 +103,11 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_library.py
|
run: uv run python ./cognee/tests/test_library.py
|
||||||
|
|
||||||
- name: Build with Poetry
|
- name: Build with uv
|
||||||
shell: bash
|
shell: bash
|
||||||
run: poetry build
|
run: uv build
|
||||||
|
|
||||||
- name: Install Package
|
- name: Install Package
|
||||||
if: ${{ !contains(matrix.os, 'windows') }}
|
if: ${{ !contains(matrix.os, 'windows') }}
|
||||||
|
|
|
||||||
|
|
@ -58,10 +58,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "postgres"
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Install specific db dependency
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E postgres
|
|
||||||
|
|
||||||
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
||||||
env:
|
env:
|
||||||
|
|
@ -90,7 +90,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_relational_db_migration.py
|
run: uv run python ./cognee/tests/test_relational_db_migration.py
|
||||||
|
|
||||||
run-relational-db-migration-test-kuzu:
|
run-relational-db-migration-test-kuzu:
|
||||||
name: Kuzu Relational DB Migration Test
|
name: Kuzu Relational DB Migration Test
|
||||||
|
|
@ -120,10 +120,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "postgres"
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Install specific db dependency
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E postgres
|
|
||||||
|
|
||||||
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
||||||
env:
|
env:
|
||||||
|
|
@ -154,7 +154,7 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_relational_db_migration.py
|
run: uv run python ./cognee/tests/test_relational_db_migration.py
|
||||||
|
|
||||||
run-relational-db-migration-test-neo4j:
|
run-relational-db-migration-test-neo4j:
|
||||||
name: Neo4j Relational DB Migration Test
|
name: Neo4j Relational DB Migration Test
|
||||||
|
|
@ -184,10 +184,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
|
extra-dependencies: "postgres"
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Install specific db dependency
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E postgres -E neo4j
|
|
||||||
|
|
||||||
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
|
||||||
env:
|
env:
|
||||||
|
|
@ -221,4 +221,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_relational_db_migration.py
|
run: uv run python ./cognee/tests/test_relational_db_migration.py
|
||||||
|
|
|
||||||
7
.github/workflows/reusable_notebook.yml
vendored
7
.github/workflows/reusable_notebook.yml
vendored
|
|
@ -46,10 +46,7 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
extra-dependencies: "notebook"
|
||||||
- name: Install specific db dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E notebook
|
|
||||||
|
|
||||||
- name: Execute Jupyter Notebook
|
- name: Execute Jupyter Notebook
|
||||||
env:
|
env:
|
||||||
|
|
@ -64,7 +61,7 @@ jobs:
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: |
|
run: |
|
||||||
poetry run jupyter nbconvert \
|
uv run jupyter nbconvert \
|
||||||
--to notebook \
|
--to notebook \
|
||||||
--execute ${{ inputs.notebook-location }} \
|
--execute ${{ inputs.notebook-location }} \
|
||||||
--output executed_notebook.ipynb \
|
--output executed_notebook.ipynb \
|
||||||
|
|
|
||||||
29
.github/workflows/search_db_tests.yml
vendored
29
.github/workflows/search_db_tests.yml
vendored
|
|
@ -28,9 +28,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Run Kuzu search Tests
|
- name: Run Kuzu search Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -46,7 +45,7 @@ jobs:
|
||||||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
||||||
VECTOR_DB_PROVIDER: 'lancedb'
|
VECTOR_DB_PROVIDER: 'lancedb'
|
||||||
DB_PROVIDER: 'sqlite'
|
DB_PROVIDER: 'sqlite'
|
||||||
run: poetry run python ./cognee/tests/test_search_db.py
|
run: uv run python ./cognee/tests/test_search_db.py
|
||||||
|
|
||||||
run-neo4j-lance-sqlite-search-tests:
|
run-neo4j-lance-sqlite-search-tests:
|
||||||
name: Search test for Neo4j/LanceDB/Sqlite
|
name: Search test for Neo4j/LanceDB/Sqlite
|
||||||
|
|
@ -78,9 +77,8 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
|
||||||
- name: Install specific db dependency
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E neo4j
|
|
||||||
|
|
||||||
- name: Run Neo4j search Tests
|
- name: Run Neo4j search Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -99,7 +97,7 @@ jobs:
|
||||||
GRAPH_DATABASE_URL: bolt://localhost:7687
|
GRAPH_DATABASE_URL: bolt://localhost:7687
|
||||||
GRAPH_DATABASE_USERNAME: neo4j
|
GRAPH_DATABASE_USERNAME: neo4j
|
||||||
GRAPH_DATABASE_PASSWORD: pleaseletmein
|
GRAPH_DATABASE_PASSWORD: pleaseletmein
|
||||||
run: poetry run python ./cognee/tests/test_search_db.py
|
run: uv run python ./cognee/tests/test_search_db.py
|
||||||
|
|
||||||
run-kuzu-pgvector-postgres-search-tests:
|
run-kuzu-pgvector-postgres-search-tests:
|
||||||
name: Search test for Kuzu/PGVector/Postgres
|
name: Search test for Kuzu/PGVector/Postgres
|
||||||
|
|
@ -129,9 +127,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
extra-dependencies: "postgres"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Dependencies already installed
|
||||||
run: poetry install -E postgres
|
run: echo "Dependencies already installed in setup"
|
||||||
|
|
||||||
- name: Run Kuzu/PGVector/Postgres Tests
|
- name: Run Kuzu/PGVector/Postgres Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -152,7 +151,7 @@ jobs:
|
||||||
DB_PORT: 5432
|
DB_PORT: 5432
|
||||||
DB_USERNAME: cognee
|
DB_USERNAME: cognee
|
||||||
DB_PASSWORD: cognee
|
DB_PASSWORD: cognee
|
||||||
run: poetry run python ./cognee/tests/test_search_db.py
|
run: uv run python ./cognee/tests/test_search_db.py
|
||||||
|
|
||||||
run-neo4j-pgvector-postgres-search-tests:
|
run-neo4j-pgvector-postgres-search-tests:
|
||||||
name: Search test for Neo4j/PGVector/Postgres
|
name: Search test for Neo4j/PGVector/Postgres
|
||||||
|
|
@ -195,10 +194,10 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
extra-dependencies: "postgres"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install -E neo4j -E postgres
|
|
||||||
|
|
||||||
- name: Run Neo4j + PGVector + Postgres search Tests
|
- name: Run Neo4j + PGVector + Postgres search Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -222,4 +221,4 @@ jobs:
|
||||||
DB_PORT: 5432
|
DB_PORT: 5432
|
||||||
DB_USERNAME: cognee
|
DB_USERNAME: cognee
|
||||||
DB_PASSWORD: cognee
|
DB_PASSWORD: cognee
|
||||||
run: poetry run python ./cognee/tests/test_search_db.py
|
run: uv run python ./cognee/tests/test_search_db.py
|
||||||
|
|
|
||||||
2
.github/workflows/test_gemini.yml
vendored
2
.github/workflows/test_gemini.yml
vendored
|
|
@ -26,4 +26,4 @@ jobs:
|
||||||
EMBEDDING_MODEL: "gemini/text-embedding-004"
|
EMBEDDING_MODEL: "gemini/text-embedding-004"
|
||||||
EMBEDDING_DIMENSIONS: "768"
|
EMBEDDING_DIMENSIONS: "768"
|
||||||
EMBEDDING_MAX_TOKENS: "8076"
|
EMBEDDING_MAX_TOKENS: "8076"
|
||||||
run: poetry run python ./examples/python/simple_example.py
|
run: uv run python ./examples/python/simple_example.py
|
||||||
|
|
|
||||||
4
.github/workflows/test_ollama.yml
vendored
4
.github/workflows/test_ollama.yml
vendored
|
|
@ -26,7 +26,7 @@ jobs:
|
||||||
|
|
||||||
- name: Install torch dependency
|
- name: Install torch dependency
|
||||||
run: |
|
run: |
|
||||||
poetry add torch
|
uv add torch
|
||||||
|
|
||||||
# - name: Install ollama
|
# - name: Install ollama
|
||||||
# run: curl -fsSL https://ollama.com/install.sh | sh
|
# run: curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
|
@ -101,4 +101,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings"
|
EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings"
|
||||||
EMBEDDING_DIMENSIONS: "4096"
|
EMBEDDING_DIMENSIONS: "4096"
|
||||||
HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral"
|
HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral"
|
||||||
run: poetry run python ./examples/python/simple_example.py
|
run: uv run python ./examples/python/simple_example.py
|
||||||
|
|
|
||||||
2
.github/workflows/test_s3_file_storage.yml
vendored
2
.github/workflows/test_s3_file_storage.yml
vendored
|
|
@ -36,4 +36,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_s3_file_storage.py
|
run: uv run python ./cognee/tests/test_s3_file_storage.py
|
||||||
|
|
|
||||||
9
.github/workflows/vector_db_tests.yml
vendored
9
.github/workflows/vector_db_tests.yml
vendored
|
|
@ -56,7 +56,7 @@ jobs:
|
||||||
# EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
# EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
# EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
# EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
# EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
# EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
# run: poetry run python ./cognee/tests/test_chromadb.py
|
# run: uv run python ./cognee/tests/test_chromadb.py
|
||||||
|
|
||||||
|
|
||||||
run-postgres-tests:
|
run-postgres-tests:
|
||||||
|
|
@ -87,10 +87,7 @@ jobs:
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
|
extra-dependencies: "postgres"
|
||||||
- name: Install specific db dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E postgres
|
|
||||||
|
|
||||||
- name: Run PGVector Tests
|
- name: Run PGVector Tests
|
||||||
env:
|
env:
|
||||||
|
|
@ -103,4 +100,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_pgvector.py
|
run: uv run python ./cognee/tests/test_pgvector.py
|
||||||
|
|
|
||||||
21
.github/workflows/weighted_edges_tests.yml
vendored
21
.github/workflows/weighted_edges_tests.yml
vendored
|
|
@ -31,7 +31,7 @@ jobs:
|
||||||
python-version: ['3.11', '3.12']
|
python-version: ['3.11', '3.12']
|
||||||
env:
|
env:
|
||||||
LLM_PROVIDER: openai
|
LLM_PROVIDER: openai
|
||||||
LLM_MODEL: gpt-4o-mini
|
LLM_MODEL: gpt-5-mini
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -47,11 +47,11 @@ jobs:
|
||||||
|
|
||||||
- name: Run Weighted Edges Unit Tests
|
- name: Run Weighted Edges Unit Tests
|
||||||
run: |
|
run: |
|
||||||
poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
|
uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
|
||||||
|
|
||||||
- name: Run Standard Graph Tests (Regression)
|
- name: Run Standard Graph Tests (Regression)
|
||||||
run: |
|
run: |
|
||||||
poetry run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short
|
uv run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short
|
||||||
|
|
||||||
test-with-different-databases:
|
test-with-different-databases:
|
||||||
name: Test Weighted Edges with Different Graph Databases
|
name: Test Weighted Edges with Different Graph Databases
|
||||||
|
|
@ -61,14 +61,12 @@ jobs:
|
||||||
database: ['kuzu', 'neo4j']
|
database: ['kuzu', 'neo4j']
|
||||||
include:
|
include:
|
||||||
- database: kuzu
|
- database: kuzu
|
||||||
install_extra: ""
|
|
||||||
graph_db_provider: "kuzu"
|
graph_db_provider: "kuzu"
|
||||||
- database: neo4j
|
- database: neo4j
|
||||||
install_extra: "-E neo4j"
|
|
||||||
graph_db_provider: "neo4j"
|
graph_db_provider: "neo4j"
|
||||||
env:
|
env:
|
||||||
LLM_PROVIDER: openai
|
LLM_PROVIDER: openai
|
||||||
LLM_MODEL: gpt-4o-mini
|
LLM_MODEL: gpt-5-mini
|
||||||
LLM_ENDPOINT: https://api.openai.com/v1/
|
LLM_ENDPOINT: https://api.openai.com/v1/
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
LLM_API_VERSION: "2024-02-01"
|
LLM_API_VERSION: "2024-02-01"
|
||||||
|
|
@ -88,22 +86,21 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
- name: Install Database Dependencies
|
- name: Dependencies already installed
|
||||||
run: |
|
run: echo "Dependencies already installed in setup"
|
||||||
poetry install ${{ matrix.install_extra }}
|
|
||||||
|
|
||||||
- name: Run Weighted Edges Tests
|
- name: Run Weighted Edges Tests
|
||||||
env:
|
env:
|
||||||
GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }}
|
GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }}
|
||||||
run: |
|
run: |
|
||||||
poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
|
uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short
|
||||||
|
|
||||||
test-examples:
|
test-examples:
|
||||||
name: Test Weighted Edges Examples
|
name: Test Weighted Edges Examples
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
env:
|
env:
|
||||||
LLM_PROVIDER: openai
|
LLM_PROVIDER: openai
|
||||||
LLM_MODEL: gpt-4o-mini
|
LLM_MODEL: gpt-5-mini
|
||||||
LLM_ENDPOINT: https://api.openai.com/v1/
|
LLM_ENDPOINT: https://api.openai.com/v1/
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
LLM_API_VERSION: "2024-02-01"
|
LLM_API_VERSION: "2024-02-01"
|
||||||
|
|
@ -125,7 +122,7 @@ jobs:
|
||||||
|
|
||||||
- name: Test Weighted Edges Example
|
- name: Test Weighted Edges Example
|
||||||
run: |
|
run: |
|
||||||
poetry run python examples/python/weighted_edges_example.py
|
uv run python examples/python/weighted_edges_example.py
|
||||||
|
|
||||||
- name: Verify Visualization File Created
|
- name: Verify Visualization File Created
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,8 @@ Your contributions are at the core of making this a true open source project. An
|
||||||
## 📦 Installation
|
## 📦 Installation
|
||||||
|
|
||||||
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.
|
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.
|
||||||
Cognee supports Python 3.8 to 3.12
|
|
||||||
|
Cognee supports Python 3.10 to 3.13
|
||||||
|
|
||||||
### With pip
|
### With pip
|
||||||
|
|
||||||
|
|
@ -102,7 +103,7 @@ pip install cognee
|
||||||
|
|
||||||
## Local Cognee installation
|
## Local Cognee installation
|
||||||
|
|
||||||
You can install the local Cognee repo using **pip**, **poetry** and **uv**.
|
You can install the local Cognee repo using **uv**, **pip** and **poetry**.
|
||||||
For local pip installation please make sure your pip version is above version 21.3.
|
For local pip installation please make sure your pip version is above version 21.3.
|
||||||
|
|
||||||
### with UV with all optional dependencies
|
### with UV with all optional dependencies
|
||||||
|
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 2.2 MiB After Width: | Height: | Size: 63 KiB |
|
|
@ -34,8 +34,8 @@
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// const defaultModel = {
|
// const defaultModel = {
|
||||||
// label: 'gpt-4o-mini',
|
// label: 'gpt-5-mini',
|
||||||
// value: 'gpt-4o-mini',
|
// value: 'gpt-5-mini',
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// export default function Settings({ onDone = () => {}, submitButtonText = 'Save' }) {
|
// export default function Settings({ onDone = () => {}, submitButtonText = 'Save' }) {
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
|
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
|
||||||
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
|
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
|
||||||
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.1",
|
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.2",
|
||||||
"fastmcp>=2.10.0,<3.0.0",
|
"fastmcp>=2.10.0,<3.0.0",
|
||||||
"mcp>=1.12.0,<2.0.0",
|
"mcp>=1.12.0,<2.0.0",
|
||||||
"uv>=0.6.3,<1.0.0",
|
"uv>=0.6.3,<1.0.0",
|
||||||
|
|
|
||||||
|
|
@ -221,14 +221,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
|
||||||
- The actual cognify process may take significant time depending on text length
|
- The actual cognify process may take significant time depending on text length
|
||||||
- Use the cognify_status tool to check the progress of the operation
|
- Use the cognify_status tool to check the progress of the operation
|
||||||
|
|
||||||
Raises
|
|
||||||
------
|
|
||||||
InvalidValueError
|
|
||||||
If LLM_API_KEY is not set
|
|
||||||
ValueError
|
|
||||||
If chunks exceed max token limits (reduce chunk_size)
|
|
||||||
DatabaseNotCreatedError
|
|
||||||
If databases are not properly initialized
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def cognify_task(
|
async def cognify_task(
|
||||||
|
|
@ -512,14 +504,6 @@ async def search(search_query: str, search_type: str) -> list:
|
||||||
- Different search types produce different output formats
|
- Different search types produce different output formats
|
||||||
- The function handles the conversion between Cognee's internal result format and MCP's output format
|
- The function handles the conversion between Cognee's internal result format and MCP's output format
|
||||||
|
|
||||||
Raises
|
|
||||||
------
|
|
||||||
InvalidValueError
|
|
||||||
If LLM_API_KEY is not set (for LLM-based search types)
|
|
||||||
ValueError
|
|
||||||
If query_text is empty or search parameters are invalid
|
|
||||||
NoDataError
|
|
||||||
If no relevant data found for the search query
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def search_task(search_query: str, search_type: str) -> str:
|
async def search_task(search_query: str, search_type: str) -> str:
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# In case you choose to use OpenAI provider, just adjust the model and api_key.
|
# In case you choose to use OpenAI provider, just adjust the model and api_key.
|
||||||
LLM_API_KEY=""
|
LLM_API_KEY=""
|
||||||
LLM_MODEL="openai/gpt-4o-mini"
|
LLM_MODEL="openai/gpt-5-mini"
|
||||||
LLM_PROVIDER="openai"
|
LLM_PROVIDER="openai"
|
||||||
# Not needed if you use OpenAI
|
# Not needed if you use OpenAI
|
||||||
LLM_ENDPOINT=""
|
LLM_ENDPOINT=""
|
||||||
|
|
|
||||||
|
|
@ -128,17 +128,12 @@ async def add(
|
||||||
|
|
||||||
Optional:
|
Optional:
|
||||||
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
|
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
|
||||||
- LLM_MODEL: Model name (default: "gpt-4o-mini")
|
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
||||||
- DEFAULT_USER_EMAIL: Custom default user email
|
- DEFAULT_USER_EMAIL: Custom default user email
|
||||||
- DEFAULT_USER_PASSWORD: Custom default user password
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
||||||
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
||||||
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
|
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If specified file paths don't exist
|
|
||||||
PermissionError: If user lacks access to files or dataset
|
|
||||||
UnsupportedFileTypeError: If file format cannot be processed
|
|
||||||
InvalidValueError: If LLM_API_KEY is not set or invalid
|
|
||||||
"""
|
"""
|
||||||
tasks = [
|
tasks = [
|
||||||
Task(resolve_data_directories, include_subdirectories=True),
|
Task(resolve_data_directories, include_subdirectories=True),
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ async def cognify(
|
||||||
- LangchainChunker: Recursive character splitting with overlap
|
- LangchainChunker: Recursive character splitting with overlap
|
||||||
Determines how documents are segmented for processing.
|
Determines how documents are segmented for processing.
|
||||||
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
|
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
|
||||||
Formula: min(embedding_max_tokens, llm_max_tokens // 2)
|
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
||||||
Default limits: ~512-8192 tokens depending on models.
|
Default limits: ~512-8192 tokens depending on models.
|
||||||
Smaller chunks = more granular but potentially fragmented knowledge.
|
Smaller chunks = more granular but potentially fragmented knowledge.
|
||||||
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
||||||
|
|
@ -177,14 +177,6 @@ async def cognify(
|
||||||
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
|
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
|
||||||
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
||||||
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
||||||
|
|
||||||
Raises:
|
|
||||||
DatasetNotFoundError: If specified datasets don't exist
|
|
||||||
PermissionError: If user lacks processing rights
|
|
||||||
InvalidValueError: If LLM_API_KEY is not set
|
|
||||||
OntologyParsingError: If ontology file is malformed
|
|
||||||
ValueError: If chunks exceed max token limits (reduce chunk_size)
|
|
||||||
DatabaseNotCreatedError: If databases are not properly initialized
|
|
||||||
"""
|
"""
|
||||||
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
|
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.exceptions import InvalidValueError, InvalidAttributeError
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
|
@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import (
|
||||||
get_llm_config,
|
get_llm_config,
|
||||||
)
|
)
|
||||||
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
|
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
|
||||||
|
from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
|
||||||
|
|
||||||
|
|
||||||
class config:
|
class config:
|
||||||
|
|
@ -92,9 +92,7 @@ class config:
|
||||||
if hasattr(llm_config, key):
|
if hasattr(llm_config, key):
|
||||||
object.__setattr__(llm_config, key, value)
|
object.__setattr__(llm_config, key, value)
|
||||||
else:
|
else:
|
||||||
raise InvalidAttributeError(
|
raise InvalidConfigAttributeError(attribute=key)
|
||||||
message=f"'{key}' is not a valid attribute of the config."
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_chunk_strategy(chunk_strategy: object):
|
def set_chunk_strategy(chunk_strategy: object):
|
||||||
|
|
@ -131,9 +129,7 @@ class config:
|
||||||
if hasattr(relational_db_config, key):
|
if hasattr(relational_db_config, key):
|
||||||
object.__setattr__(relational_db_config, key, value)
|
object.__setattr__(relational_db_config, key, value)
|
||||||
else:
|
else:
|
||||||
raise InvalidAttributeError(
|
raise InvalidConfigAttributeError(attribute=key)
|
||||||
message=f"'{key}' is not a valid attribute of the config."
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_migration_db_config(config_dict: dict):
|
def set_migration_db_config(config_dict: dict):
|
||||||
|
|
@ -145,9 +141,7 @@ class config:
|
||||||
if hasattr(migration_db_config, key):
|
if hasattr(migration_db_config, key):
|
||||||
object.__setattr__(migration_db_config, key, value)
|
object.__setattr__(migration_db_config, key, value)
|
||||||
else:
|
else:
|
||||||
raise InvalidAttributeError(
|
raise InvalidConfigAttributeError(attribute=key)
|
||||||
message=f"'{key}' is not a valid attribute of the config."
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_graph_db_config(config_dict: dict) -> None:
|
def set_graph_db_config(config_dict: dict) -> None:
|
||||||
|
|
@ -171,9 +165,7 @@ class config:
|
||||||
if hasattr(vector_db_config, key):
|
if hasattr(vector_db_config, key):
|
||||||
object.__setattr__(vector_db_config, key, value)
|
object.__setattr__(vector_db_config, key, value)
|
||||||
else:
|
else:
|
||||||
raise InvalidAttributeError(
|
InvalidConfigAttributeError(attribute=key)
|
||||||
message=f"'{key}' is not a valid attribute of the config."
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_vector_db_key(db_key: str):
|
def set_vector_db_key(db_key: str):
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||||
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
|
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError
|
from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.users.methods import get_authenticated_user
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
from cognee.modules.users.permissions.methods import (
|
from cognee.modules.users.permissions.methods import (
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||||
from cognee.context_global_variables import set_database_global_context_variables
|
from cognee.context_global_variables import set_database_global_context_variables
|
||||||
|
|
||||||
from cognee.api.v1.delete.exceptions import (
|
from cognee.api.v1.exceptions import (
|
||||||
DocumentNotFoundError,
|
DocumentNotFoundError,
|
||||||
DatasetNotFoundError,
|
DatasetNotFoundError,
|
||||||
DocumentSubgraphNotFoundError,
|
DocumentSubgraphNotFoundError,
|
||||||
|
|
|
||||||
13
cognee/api/v1/exceptions/__init__.py
Normal file
13
cognee/api/v1/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various data errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import (
|
||||||
|
InvalidConfigAttributeError,
|
||||||
|
DocumentNotFoundError,
|
||||||
|
DatasetNotFoundError,
|
||||||
|
DataNotFoundError,
|
||||||
|
DocumentSubgraphNotFoundError,
|
||||||
|
)
|
||||||
|
|
@ -1,10 +1,19 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeConfigurationError, CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class DocumentNotFoundError(CogneeApiError):
|
class InvalidConfigAttributeError(CogneeConfigurationError):
|
||||||
"""Raised when a document cannot be found in the database."""
|
def __init__(
|
||||||
|
self,
|
||||||
|
attribute: str,
|
||||||
|
name: str = "InvalidConfigAttributeError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = f"'{attribute}' is not a valid attribute of the configuration."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentNotFoundError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Document not found in database.",
|
message: str = "Document not found in database.",
|
||||||
|
|
@ -14,9 +23,7 @@ class DocumentNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class DatasetNotFoundError(CogneeApiError):
|
class DatasetNotFoundError(CogneeValidationError):
|
||||||
"""Raised when a dataset cannot be found."""
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Dataset not found.",
|
message: str = "Dataset not found.",
|
||||||
|
|
@ -26,9 +33,7 @@ class DatasetNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class DataNotFoundError(CogneeApiError):
|
class DataNotFoundError(CogneeValidationError):
|
||||||
"""Raised when a dataset cannot be found."""
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Data not found.",
|
message: str = "Data not found.",
|
||||||
|
|
@ -38,9 +43,7 @@ class DataNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class DocumentSubgraphNotFoundError(CogneeApiError):
|
class DocumentSubgraphNotFoundError(CogneeValidationError):
|
||||||
"""Raised when a document's subgraph cannot be found in the graph database."""
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Document subgraph not found in graph database.",
|
message: str = "Document subgraph not found in graph database.",
|
||||||
|
|
@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
|
||||||
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
|
||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
temperature: Optional[float] = 1.0
|
temperature: Optional[float] = 1.0
|
||||||
max_tokens: Optional[int] = None
|
max_completion_tokens: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class ToolCallOutput(BaseModel):
|
class ToolCallOutput(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -161,13 +161,6 @@ async def search(
|
||||||
- VECTOR_DB_PROVIDER: Must match what was used during cognify
|
- VECTOR_DB_PROVIDER: Must match what was used during cognify
|
||||||
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
|
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
|
||||||
|
|
||||||
Raises:
|
|
||||||
DatasetNotFoundError: If specified datasets don't exist or aren't accessible
|
|
||||||
PermissionDeniedError: If user lacks read access to requested datasets
|
|
||||||
NoDataError: If no relevant data found for the search query
|
|
||||||
InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
|
|
||||||
ValueError: If query_text is empty or search parameters are invalid
|
|
||||||
CollectionNotFoundError: If vector collection not found (data not processed)
|
|
||||||
"""
|
"""
|
||||||
# We use lists from now on for datasets
|
# We use lists from now on for datasets
|
||||||
if isinstance(datasets, UUID) or isinstance(datasets, str):
|
if isinstance(datasets, UUID) or isinstance(datasets, str):
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
|
||||||
"EM",
|
"EM",
|
||||||
"f1",
|
"f1",
|
||||||
] # Use only 'correctness' for DirectLLM
|
] # Use only 'correctness' for DirectLLM
|
||||||
deepeval_model: str = "gpt-4o-mini"
|
deepeval_model: str = "gpt-5-mini"
|
||||||
|
|
||||||
# Metrics params
|
# Metrics params
|
||||||
calculate_metrics: bool = True
|
calculate_metrics: bool = True
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,13 @@
|
||||||
Custom exceptions for the Cognee API.
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
This module defines a set of exceptions for handling various application errors,
|
This module defines a set of exceptions for handling various application errors,
|
||||||
such as service failures, resource conflicts, and invalid operations.
|
such as System, Validation, Configuration or TransientErrors
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
CogneeApiError,
|
CogneeApiError,
|
||||||
ServiceError,
|
CogneeSystemError,
|
||||||
InvalidValueError,
|
CogneeValidationError,
|
||||||
InvalidAttributeError,
|
CogneeConfigurationError,
|
||||||
CriticalError,
|
CogneeTransientError,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -35,37 +35,57 @@ class CogneeApiError(Exception):
|
||||||
return f"{self.name}: {self.message} (Status code: {self.status_code})"
|
return f"{self.name}: {self.message} (Status code: {self.status_code})"
|
||||||
|
|
||||||
|
|
||||||
class ServiceError(CogneeApiError):
|
class CogneeSystemError(CogneeApiError):
|
||||||
"""Failures in external services or APIs, like a database or a third-party service"""
|
"""System error"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Service is unavailable.",
|
message: str = "A system error occurred.",
|
||||||
name: str = "ServiceError",
|
name: str = "CogneeSystemError",
|
||||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
log=True,
|
||||||
|
log_level="ERROR",
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code, log, log_level)
|
||||||
|
|
||||||
|
|
||||||
class InvalidValueError(CogneeApiError):
|
class CogneeValidationError(CogneeApiError):
|
||||||
|
"""Validation error"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Invalid Value.",
|
message: str = "A validation error occurred.",
|
||||||
name: str = "InvalidValueError",
|
name: str = "CogneeValidationError",
|
||||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
log=True,
|
||||||
|
log_level="ERROR",
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code, log, log_level)
|
||||||
|
|
||||||
|
|
||||||
class InvalidAttributeError(CogneeApiError):
|
class CogneeConfigurationError(CogneeApiError):
|
||||||
|
"""SystemConfigError"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Invalid attribute.",
|
message: str = "A system configuration error occurred.",
|
||||||
name: str = "InvalidAttributeError",
|
name: str = "CogneeConfigurationError",
|
||||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
log=True,
|
||||||
|
log_level="ERROR",
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code, log, log_level)
|
||||||
|
|
||||||
|
|
||||||
class CriticalError(CogneeApiError):
|
class CogneeTransientError(CogneeApiError):
|
||||||
pass
|
"""TransientError"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "A transient error occurred.",
|
||||||
|
name: str = "CogneeTransientError",
|
||||||
|
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||||
|
log=True,
|
||||||
|
log_level="ERROR",
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code, log, log_level)
|
||||||
|
|
|
||||||
7
cognee/infrastructure/data/exceptions/__init__.py
Normal file
7
cognee/infrastructure/data/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various data errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import KeywordExtractionError
|
||||||
22
cognee/infrastructure/data/exceptions/exceptions.py
Normal file
22
cognee/infrastructure/data/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
from cognee.exceptions import (
|
||||||
|
CogneeValidationError,
|
||||||
|
)
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordExtractionError(CogneeValidationError):
|
||||||
|
"""
|
||||||
|
Raised when a provided value is syntactically valid but semantically unacceptable
|
||||||
|
for the given operation.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- Passing an empty string to a keyword extraction function.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Extract_keywords cannot extract keywords from empty text.",
|
||||||
|
name: str = "KeywordExtractionError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
|
||||||
from cognee.shared.utils import extract_pos_tags
|
from cognee.shared.utils import extract_pos_tags
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,7 +8,7 @@ def extract_keywords(text: str) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Extract keywords from the provided text string.
|
Extract keywords from the provided text string.
|
||||||
|
|
||||||
This function raises an InvalidValueError if the input text is empty. It processes the
|
This function raises an KeyWordExtractionError if the input text is empty. It processes the
|
||||||
text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
|
text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
|
||||||
relevant keywords based on their frequency. The function returns a list of up to 15
|
relevant keywords based on their frequency. The function returns a list of up to 15
|
||||||
keywords, each having more than 3 characters.
|
keywords, each having more than 3 characters.
|
||||||
|
|
@ -25,7 +25,7 @@ def extract_keywords(text: str) -> list[str]:
|
||||||
with more than 3 characters.
|
with more than 3 characters.
|
||||||
"""
|
"""
|
||||||
if len(text) == 0:
|
if len(text) == 0:
|
||||||
raise InvalidValueError(message="extract_keywords cannot extract keywords from empty text.")
|
raise KeywordExtractionError()
|
||||||
|
|
||||||
tags = extract_pos_tags(text)
|
tags = extract_pos_tags(text)
|
||||||
nouns = [word for (word, tag) in tags if tag == "NN"]
|
nouns = [word for (word, tag) in tags if tag == "NN"]
|
||||||
|
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
|
||||||
from fastapi import status
|
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingException(CogneeApiError):
|
|
||||||
"""
|
|
||||||
Custom exception for handling embedding-related errors.
|
|
||||||
|
|
||||||
This exception class is designed to indicate issues specifically related to embeddings
|
|
||||||
within the application. It extends the base exception class CogneeApiError and allows
|
|
||||||
for customization of the error message, name, and status code.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
message: str = "Embedding Exception.",
|
|
||||||
name: str = "EmbeddingException",
|
|
||||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
||||||
):
|
|
||||||
super().__init__(message, name, status_code)
|
|
||||||
|
|
@ -8,4 +8,7 @@ from .exceptions import (
|
||||||
EntityNotFoundError,
|
EntityNotFoundError,
|
||||||
EntityAlreadyExistsError,
|
EntityAlreadyExistsError,
|
||||||
DatabaseNotCreatedError,
|
DatabaseNotCreatedError,
|
||||||
|
EmbeddingException,
|
||||||
|
MissingQueryParameterError,
|
||||||
|
MutuallyExclusiveQueryParametersError,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
from cognee.exceptions import CogneeApiError, CriticalError
|
from cognee.exceptions import CogneeSystemError, CogneeValidationError, CogneeConfigurationError
|
||||||
|
|
||||||
|
|
||||||
class DatabaseNotCreatedError(CriticalError):
|
class DatabaseNotCreatedError(CogneeSystemError):
|
||||||
"""
|
"""
|
||||||
Represents an error indicating that the database has not been created. This error should
|
Represents an error indicating that the database has not been created. This error should
|
||||||
be raised when an attempt is made to access the database before it has been initialized.
|
be raised when an attempt is made to access the database before it has been initialized.
|
||||||
|
|
||||||
Inherits from CriticalError. Overrides the constructor to include a default message and
|
Inherits from CogneeSystemError. Overrides the constructor to include a default message and
|
||||||
status code.
|
status code.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -20,10 +20,10 @@ class DatabaseNotCreatedError(CriticalError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class EntityNotFoundError(CogneeApiError):
|
class EntityNotFoundError(CogneeValidationError):
|
||||||
"""
|
"""
|
||||||
Represents an error when a requested entity is not found in the database. This class
|
Represents an error when a requested entity is not found in the database. This class
|
||||||
inherits from CogneeApiError.
|
inherits from CogneeValidationError.
|
||||||
|
|
||||||
Public methods:
|
Public methods:
|
||||||
|
|
||||||
|
|
@ -49,11 +49,11 @@ class EntityNotFoundError(CogneeApiError):
|
||||||
# super().__init__(message, name, status_code) :TODO: This is not an error anymore with the dynamic exception handling therefore we shouldn't log error
|
# super().__init__(message, name, status_code) :TODO: This is not an error anymore with the dynamic exception handling therefore we shouldn't log error
|
||||||
|
|
||||||
|
|
||||||
class EntityAlreadyExistsError(CogneeApiError):
|
class EntityAlreadyExistsError(CogneeValidationError):
|
||||||
"""
|
"""
|
||||||
Represents an error when an entity creation is attempted but the entity already exists.
|
Represents an error when an entity creation is attempted but the entity already exists.
|
||||||
|
|
||||||
This class is derived from CogneeApiError and is used to signal a conflict in operations
|
This class is derived from CogneeValidationError and is used to signal a conflict in operations
|
||||||
involving resource creation.
|
involving resource creation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -66,11 +66,11 @@ class EntityAlreadyExistsError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NodesetFilterNotSupportedError(CogneeApiError):
|
class NodesetFilterNotSupportedError(CogneeConfigurationError):
|
||||||
"""
|
"""
|
||||||
Raise an exception when a nodeset filter is not supported by the current database.
|
Raise an exception when a nodeset filter is not supported by the current database.
|
||||||
|
|
||||||
This exception inherits from `CogneeApiError` and is designed to provide information
|
This exception inherits from `CogneeConfigurationError` and is designed to provide information
|
||||||
about the specific issue of unsupported nodeset filters in the context of graph
|
about the specific issue of unsupported nodeset filters in the context of graph
|
||||||
databases.
|
databases.
|
||||||
"""
|
"""
|
||||||
|
|
@ -84,3 +84,51 @@ class NodesetFilterNotSupportedError(CogneeApiError):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.name = name
|
self.name = name
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingException(CogneeConfigurationError):
|
||||||
|
"""
|
||||||
|
Custom exception for handling embedding-related errors.
|
||||||
|
|
||||||
|
This exception class is designed to indicate issues specifically related to embeddings
|
||||||
|
within the application. It extends the base exception class CogneeConfigurationError allows
|
||||||
|
for customization of the error message, name, and status code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Embedding Exception.",
|
||||||
|
name: str = "EmbeddingException",
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class MissingQueryParameterError(CogneeValidationError):
|
||||||
|
"""
|
||||||
|
Raised when neither 'query_text' nor 'query_vector' is provided,
|
||||||
|
and at least one is required to perform the operation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str = "MissingQueryParameterError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = "One of query_text or query_vector must be provided!"
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class MutuallyExclusiveQueryParametersError(CogneeValidationError):
|
||||||
|
"""
|
||||||
|
Raised when both 'text' and 'embedding' are provided to the search function,
|
||||||
|
but only one type of input is allowed at a time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str = "MutuallyExclusiveQueryParametersError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = "The search function accepts either text or embedding as input, but not both."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,6 @@ async def get_graph_engine() -> GraphDBInterface:
|
||||||
if hasattr(graph_client, "initialize"):
|
if hasattr(graph_client, "initialize"):
|
||||||
await graph_client.initialize()
|
await graph_client.initialize()
|
||||||
|
|
||||||
# Handle loading of graph for NetworkX
|
|
||||||
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
|
|
||||||
await graph_client.load_graph_from_file()
|
|
||||||
|
|
||||||
return graph_client
|
return graph_client
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -181,8 +177,7 @@ def create_graph_engine(
|
||||||
graph_id=graph_identifier,
|
graph_id=graph_identifier,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .networkx.adapter import NetworkXAdapter
|
raise EnvironmentError(
|
||||||
|
f"Unsupported graph database provider: {graph_database_provider}. "
|
||||||
graph_client = NetworkXAdapter(filename=graph_file_path)
|
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
|
||||||
|
)
|
||||||
return graph_client
|
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,16 @@
|
||||||
This module defines custom exceptions for Neptune Analytics operations.
|
This module defines custom exceptions for Neptune Analytics operations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import (
|
||||||
|
CogneeSystemError,
|
||||||
|
CogneeTransientError,
|
||||||
|
CogneeValidationError,
|
||||||
|
CogneeConfigurationError,
|
||||||
|
)
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsError(CogneeApiError):
|
class NeptuneAnalyticsError(CogneeSystemError):
|
||||||
"""Base exception for Neptune Analytics operations."""
|
"""Base exception for Neptune Analytics operations."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -19,7 +24,7 @@ class NeptuneAnalyticsError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsConnectionError(CogneeTransientError):
|
||||||
"""Exception raised when connection to Neptune Analytics fails."""
|
"""Exception raised when connection to Neptune Analytics fails."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -31,7 +36,7 @@ class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsQueryError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsQueryError(CogneeValidationError):
|
||||||
"""Exception raised when a query execution fails."""
|
"""Exception raised when a query execution fails."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -43,7 +48,7 @@ class NeptuneAnalyticsQueryError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsAuthenticationError(CogneeConfigurationError):
|
||||||
"""Exception raised when authentication with Neptune Analytics fails."""
|
"""Exception raised when authentication with Neptune Analytics fails."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -55,7 +60,7 @@ class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsConfigurationError(CogneeConfigurationError):
|
||||||
"""Exception raised when Neptune Analytics configuration is invalid."""
|
"""Exception raised when Neptune Analytics configuration is invalid."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -67,7 +72,7 @@ class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsTimeoutError(CogneeTransientError):
|
||||||
"""Exception raised when a Neptune Analytics operation times out."""
|
"""Exception raised when a Neptune Analytics operation times out."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -79,7 +84,7 @@ class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsThrottlingError(CogneeTransientError):
|
||||||
"""Exception raised when requests are throttled by Neptune Analytics."""
|
"""Exception raised when requests are throttled by Neptune Analytics."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -91,7 +96,7 @@ class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsResourceNotFoundError(CogneeValidationError):
|
||||||
"""Exception raised when a Neptune Analytics resource is not found."""
|
"""Exception raised when a Neptune Analytics resource is not found."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -103,7 +108,7 @@ class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NeptuneAnalyticsInvalidParameterError(NeptuneAnalyticsError):
|
class NeptuneAnalyticsInvalidParameterError(CogneeValidationError):
|
||||||
"""Exception raised when invalid parameters are provided to Neptune Analytics."""
|
"""Exception raised when invalid parameters are provided to Neptune Analytics."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -9,7 +9,7 @@ from typing import List, Dict, Any, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
from falkordb import FalkorDB
|
from falkordb import FalkorDB
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
||||||
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
||||||
GraphDBInterface,
|
GraphDBInterface,
|
||||||
record_graph_changes,
|
record_graph_changes,
|
||||||
|
|
@ -721,7 +721,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
Returns the search results as a result set from the graph database.
|
Returns the search results as a result set from the graph database.
|
||||||
"""
|
"""
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise MissingQueryParameterError()
|
||||||
|
|
||||||
if query_text and not query_vector:
|
if query_text and not query_vector:
|
||||||
query_vector = (await self.embed_data([query_text]))[0]
|
query_vector = (await self.embed_data([query_text]))[0]
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ import json
|
||||||
from typing import List, Optional, Any, Dict, Type, Tuple
|
from typing import List, Optional, Any, Dict, Type, Tuple
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
||||||
|
from cognee.infrastructure.databases.exceptions import MutuallyExclusiveQueryParametersError
|
||||||
from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB
|
from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB
|
||||||
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
|
|
@ -274,11 +275,9 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
||||||
limit = self._TOPK_UPPER_BOUND
|
limit = self._TOPK_UPPER_BOUND
|
||||||
|
|
||||||
if query_vector and query_text:
|
if query_vector and query_text:
|
||||||
raise InvalidValueError(
|
raise MutuallyExclusiveQueryParametersError()
|
||||||
message="The search function accepts either text or embedding as input, but not both."
|
|
||||||
)
|
|
||||||
elif query_text is None and query_vector is None:
|
elif query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise MissingQueryParameterError()
|
||||||
elif query_vector:
|
elif query_vector:
|
||||||
embedding = query_vector
|
embedding = query_vector
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,13 @@ from uuid import UUID
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from chromadb import AsyncHttpClient, Settings
|
from chromadb import AsyncHttpClient, Settings
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.modules.storage.utils import get_own_properties
|
from cognee.modules.storage.utils import get_own_properties
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
from cognee.infrastructure.engine.utils import parse_id
|
||||||
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
||||||
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
||||||
|
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
||||||
|
|
||||||
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
||||||
from ..vector_db_interface import VectorDBInterface
|
from ..vector_db_interface import VectorDBInterface
|
||||||
|
|
@ -378,7 +378,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
||||||
Returns a list of ScoredResult instances representing the search results.
|
Returns a list of ScoredResult instances representing the search results.
|
||||||
"""
|
"""
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise MissingQueryParameterError()
|
||||||
|
|
||||||
if query_text and not query_vector:
|
if query_text and not query_vector:
|
||||||
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
||||||
|
|
|
||||||
|
|
@ -41,11 +41,11 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
||||||
self,
|
self,
|
||||||
model: Optional[str] = "openai/text-embedding-3-large",
|
model: Optional[str] = "openai/text-embedding-3-large",
|
||||||
dimensions: Optional[int] = 3072,
|
dimensions: Optional[int] = 3072,
|
||||||
max_tokens: int = 512,
|
max_completion_tokens: int = 512,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.dimensions = dimensions
|
self.dimensions = dimensions
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
self.tokenizer = self.get_tokenizer()
|
self.tokenizer = self.get_tokenizer()
|
||||||
# self.retry_count = 0
|
# self.retry_count = 0
|
||||||
self.embedding_model = TextEmbedding(model_name=model)
|
self.embedding_model = TextEmbedding(model_name=model)
|
||||||
|
|
@ -112,7 +112,9 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
||||||
"""
|
"""
|
||||||
logger.debug("Loading tokenizer for FastembedEmbeddingEngine...")
|
logger.debug("Loading tokenizer for FastembedEmbeddingEngine...")
|
||||||
|
|
||||||
tokenizer = TikTokenTokenizer(model="gpt-4o", max_tokens=self.max_tokens)
|
tokenizer = TikTokenTokenizer(
|
||||||
|
model="gpt-4o", max_completion_tokens=self.max_completion_tokens
|
||||||
|
)
|
||||||
|
|
||||||
logger.debug("Tokenizer loaded for for FastembedEmbeddingEngine")
|
logger.debug("Tokenizer loaded for for FastembedEmbeddingEngine")
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import math
|
||||||
import litellm
|
import litellm
|
||||||
import os
|
import os
|
||||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||||
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
|
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
||||||
from cognee.infrastructure.llm.tokenizer.Gemini import (
|
from cognee.infrastructure.llm.tokenizer.Gemini import (
|
||||||
GeminiTokenizer,
|
GeminiTokenizer,
|
||||||
)
|
)
|
||||||
|
|
@ -57,7 +57,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
||||||
api_key: str = None,
|
api_key: str = None,
|
||||||
endpoint: str = None,
|
endpoint: str = None,
|
||||||
api_version: str = None,
|
api_version: str = None,
|
||||||
max_tokens: int = 512,
|
max_completion_tokens: int = 512,
|
||||||
):
|
):
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
|
|
@ -65,7 +65,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
||||||
self.provider = provider
|
self.provider = provider
|
||||||
self.model = model
|
self.model = model
|
||||||
self.dimensions = dimensions
|
self.dimensions = dimensions
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
self.tokenizer = self.get_tokenizer()
|
self.tokenizer = self.get_tokenizer()
|
||||||
self.retry_count = 0
|
self.retry_count = 0
|
||||||
|
|
||||||
|
|
@ -179,20 +179,29 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
||||||
model = self.model.split("/")[-1]
|
model = self.model.split("/")[-1]
|
||||||
|
|
||||||
if "openai" in self.provider.lower():
|
if "openai" in self.provider.lower():
|
||||||
tokenizer = TikTokenTokenizer(model=model, max_tokens=self.max_tokens)
|
tokenizer = TikTokenTokenizer(
|
||||||
|
model=model, max_completion_tokens=self.max_completion_tokens
|
||||||
|
)
|
||||||
elif "gemini" in self.provider.lower():
|
elif "gemini" in self.provider.lower():
|
||||||
tokenizer = GeminiTokenizer(model=model, max_tokens=self.max_tokens)
|
tokenizer = GeminiTokenizer(
|
||||||
|
model=model, max_completion_tokens=self.max_completion_tokens
|
||||||
|
)
|
||||||
elif "mistral" in self.provider.lower():
|
elif "mistral" in self.provider.lower():
|
||||||
tokenizer = MistralTokenizer(model=model, max_tokens=self.max_tokens)
|
tokenizer = MistralTokenizer(
|
||||||
|
model=model, max_completion_tokens=self.max_completion_tokens
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tokenizer = HuggingFaceTokenizer(
|
tokenizer = HuggingFaceTokenizer(
|
||||||
model=self.model.replace("hosted_vllm/", ""), max_tokens=self.max_tokens
|
model=self.model.replace("hosted_vllm/", ""),
|
||||||
|
max_completion_tokens=self.max_completion_tokens,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}")
|
logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}")
|
||||||
logger.info("Switching to TikToken default tokenizer.")
|
logger.info("Switching to TikToken default tokenizer.")
|
||||||
tokenizer = TikTokenTokenizer(model=None, max_tokens=self.max_tokens)
|
tokenizer = TikTokenTokenizer(
|
||||||
|
model=None, max_completion_tokens=self.max_completion_tokens
|
||||||
|
)
|
||||||
|
|
||||||
logger.debug(f"Tokenizer loaded for model: {self.model}")
|
logger.debug(f"Tokenizer loaded for model: {self.model}")
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
||||||
Instance variables:
|
Instance variables:
|
||||||
- model
|
- model
|
||||||
- dimensions
|
- dimensions
|
||||||
- max_tokens
|
- max_completion_tokens
|
||||||
- endpoint
|
- endpoint
|
||||||
- mock
|
- mock
|
||||||
- huggingface_tokenizer_name
|
- huggingface_tokenizer_name
|
||||||
|
|
@ -39,7 +39,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
||||||
|
|
||||||
model: str
|
model: str
|
||||||
dimensions: int
|
dimensions: int
|
||||||
max_tokens: int
|
max_completion_tokens: int
|
||||||
endpoint: str
|
endpoint: str
|
||||||
mock: bool
|
mock: bool
|
||||||
huggingface_tokenizer_name: str
|
huggingface_tokenizer_name: str
|
||||||
|
|
@ -50,13 +50,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
||||||
self,
|
self,
|
||||||
model: Optional[str] = "avr/sfr-embedding-mistral:latest",
|
model: Optional[str] = "avr/sfr-embedding-mistral:latest",
|
||||||
dimensions: Optional[int] = 1024,
|
dimensions: Optional[int] = 1024,
|
||||||
max_tokens: int = 512,
|
max_completion_tokens: int = 512,
|
||||||
endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
|
endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
|
||||||
huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
|
huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.dimensions = dimensions
|
self.dimensions = dimensions
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.huggingface_tokenizer_name = huggingface_tokenizer
|
self.huggingface_tokenizer_name = huggingface_tokenizer
|
||||||
self.tokenizer = self.get_tokenizer()
|
self.tokenizer = self.get_tokenizer()
|
||||||
|
|
@ -132,7 +132,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
||||||
"""
|
"""
|
||||||
logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...")
|
logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...")
|
||||||
tokenizer = HuggingFaceTokenizer(
|
tokenizer = HuggingFaceTokenizer(
|
||||||
model=self.huggingface_tokenizer_name, max_tokens=self.max_tokens
|
model=self.huggingface_tokenizer_name, max_completion_tokens=self.max_completion_tokens
|
||||||
)
|
)
|
||||||
logger.debug("Tokenizer loaded for OllamaEmbeddingEngine")
|
logger.debug("Tokenizer loaded for OllamaEmbeddingEngine")
|
||||||
return tokenizer
|
return tokenizer
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class EmbeddingConfig(BaseSettings):
|
||||||
embedding_endpoint: Optional[str] = None
|
embedding_endpoint: Optional[str] = None
|
||||||
embedding_api_key: Optional[str] = None
|
embedding_api_key: Optional[str] = None
|
||||||
embedding_api_version: Optional[str] = None
|
embedding_api_version: Optional[str] = None
|
||||||
embedding_max_tokens: Optional[int] = 8191
|
embedding_max_completion_tokens: Optional[int] = 8191
|
||||||
huggingface_tokenizer: Optional[str] = None
|
huggingface_tokenizer: Optional[str] = None
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ class EmbeddingConfig(BaseSettings):
|
||||||
"embedding_endpoint": self.embedding_endpoint,
|
"embedding_endpoint": self.embedding_endpoint,
|
||||||
"embedding_api_key": self.embedding_api_key,
|
"embedding_api_key": self.embedding_api_key,
|
||||||
"embedding_api_version": self.embedding_api_version,
|
"embedding_api_version": self.embedding_api_version,
|
||||||
"embedding_max_tokens": self.embedding_max_tokens,
|
"embedding_max_completion_tokens": self.embedding_max_completion_tokens,
|
||||||
"huggingface_tokenizer": self.huggingface_tokenizer,
|
"huggingface_tokenizer": self.huggingface_tokenizer,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ def get_embedding_engine() -> EmbeddingEngine:
|
||||||
config.embedding_provider,
|
config.embedding_provider,
|
||||||
config.embedding_model,
|
config.embedding_model,
|
||||||
config.embedding_dimensions,
|
config.embedding_dimensions,
|
||||||
config.embedding_max_tokens,
|
config.embedding_max_completion_tokens,
|
||||||
config.embedding_endpoint,
|
config.embedding_endpoint,
|
||||||
config.embedding_api_key,
|
config.embedding_api_key,
|
||||||
config.embedding_api_version,
|
config.embedding_api_version,
|
||||||
|
|
@ -41,7 +41,7 @@ def create_embedding_engine(
|
||||||
embedding_provider,
|
embedding_provider,
|
||||||
embedding_model,
|
embedding_model,
|
||||||
embedding_dimensions,
|
embedding_dimensions,
|
||||||
embedding_max_tokens,
|
embedding_max_completion_tokens,
|
||||||
embedding_endpoint,
|
embedding_endpoint,
|
||||||
embedding_api_key,
|
embedding_api_key,
|
||||||
embedding_api_version,
|
embedding_api_version,
|
||||||
|
|
@ -58,7 +58,7 @@ def create_embedding_engine(
|
||||||
'ollama', or another supported provider.
|
'ollama', or another supported provider.
|
||||||
- embedding_model: The model to be used for the embedding engine.
|
- embedding_model: The model to be used for the embedding engine.
|
||||||
- embedding_dimensions: The number of dimensions for the embeddings.
|
- embedding_dimensions: The number of dimensions for the embeddings.
|
||||||
- embedding_max_tokens: The maximum number of tokens for the embeddings.
|
- embedding_max_completion_tokens: The maximum number of tokens for the embeddings.
|
||||||
- embedding_endpoint: The endpoint for the embedding service, relevant for certain
|
- embedding_endpoint: The endpoint for the embedding service, relevant for certain
|
||||||
providers.
|
providers.
|
||||||
- embedding_api_key: API key to authenticate with the embedding service, if
|
- embedding_api_key: API key to authenticate with the embedding service, if
|
||||||
|
|
@ -81,7 +81,7 @@ def create_embedding_engine(
|
||||||
return FastembedEmbeddingEngine(
|
return FastembedEmbeddingEngine(
|
||||||
model=embedding_model,
|
model=embedding_model,
|
||||||
dimensions=embedding_dimensions,
|
dimensions=embedding_dimensions,
|
||||||
max_tokens=embedding_max_tokens,
|
max_completion_tokens=embedding_max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
if embedding_provider == "ollama":
|
if embedding_provider == "ollama":
|
||||||
|
|
@ -90,7 +90,7 @@ def create_embedding_engine(
|
||||||
return OllamaEmbeddingEngine(
|
return OllamaEmbeddingEngine(
|
||||||
model=embedding_model,
|
model=embedding_model,
|
||||||
dimensions=embedding_dimensions,
|
dimensions=embedding_dimensions,
|
||||||
max_tokens=embedding_max_tokens,
|
max_completion_tokens=embedding_max_completion_tokens,
|
||||||
endpoint=embedding_endpoint,
|
endpoint=embedding_endpoint,
|
||||||
huggingface_tokenizer=huggingface_tokenizer,
|
huggingface_tokenizer=huggingface_tokenizer,
|
||||||
)
|
)
|
||||||
|
|
@ -104,5 +104,5 @@ def create_embedding_engine(
|
||||||
api_version=embedding_api_version,
|
api_version=embedding_api_version,
|
||||||
model=embedding_model,
|
model=embedding_model,
|
||||||
dimensions=embedding_dimensions,
|
dimensions=embedding_dimensions,
|
||||||
max_tokens=embedding_max_tokens,
|
max_completion_tokens=embedding_max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
from cognee.exceptions import CriticalError
|
from cognee.exceptions import CogneeValidationError
|
||||||
|
|
||||||
|
|
||||||
class CollectionNotFoundError(CriticalError):
|
class CollectionNotFoundError(CogneeValidationError):
|
||||||
"""
|
"""
|
||||||
Represents an error that occurs when a requested collection cannot be found.
|
Represents an error that occurs when a requested collection cannot be found.
|
||||||
|
|
||||||
This class extends the CriticalError to handle specific cases where a requested
|
This class extends the CogneeValidationError to handle specific cases where a requested
|
||||||
collection is unavailable. It can be initialized with a custom message and allows for
|
collection is unavailable. It can be initialized with a custom message and allows for
|
||||||
logging options including log level and whether to log the error.
|
logging options including log level and whether to log the error.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from pydantic import BaseModel
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
|
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
from cognee.infrastructure.engine.utils import parse_id
|
||||||
from cognee.infrastructure.files.storage import get_file_storage
|
from cognee.infrastructure.files.storage import get_file_storage
|
||||||
|
|
@ -228,7 +228,7 @@ class LanceDBAdapter(VectorDBInterface):
|
||||||
normalized: bool = True,
|
normalized: bool = True,
|
||||||
):
|
):
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise MissingQueryParameterError()
|
||||||
|
|
||||||
if query_text and not query_vector:
|
if query_text and not query_vector:
|
||||||
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from sqlalchemy.exc import ProgrammingError
|
||||||
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
||||||
from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
|
from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
from cognee.infrastructure.engine.utils import parse_id
|
||||||
|
|
@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
||||||
from distributed.utils import override_distributed
|
from distributed.utils import override_distributed
|
||||||
from distributed.tasks.queued_add_data_points import queued_add_data_points
|
from distributed.tasks.queued_add_data_points import queued_add_data_points
|
||||||
|
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
||||||
|
|
||||||
from ...relational.ModelBase import Base
|
from ...relational.ModelBase import Base
|
||||||
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
||||||
|
|
@ -275,7 +276,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
||||||
return metadata.tables[collection_name]
|
return metadata.tables[collection_name]
|
||||||
else:
|
else:
|
||||||
raise CollectionNotFoundError(
|
raise CollectionNotFoundError(
|
||||||
f"Collection '{collection_name}' not found!", log_level="DEBUG"
|
f"Collection '{collection_name}' not found!",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def retrieve(self, collection_name: str, data_point_ids: List[str]):
|
async def retrieve(self, collection_name: str, data_point_ids: List[str]):
|
||||||
|
|
@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
||||||
with_vector: bool = False,
|
with_vector: bool = False,
|
||||||
) -> List[ScoredResult]:
|
) -> List[ScoredResult]:
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise MissingQueryParameterError()
|
||||||
|
|
||||||
if query_text and not query_vector:
|
if query_text and not query_vector:
|
||||||
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
|
||||||
- llm_api_version
|
- llm_api_version
|
||||||
- llm_temperature
|
- llm_temperature
|
||||||
- llm_streaming
|
- llm_streaming
|
||||||
- llm_max_tokens
|
- llm_max_completion_tokens
|
||||||
- transcription_model
|
- transcription_model
|
||||||
- graph_prompt_path
|
- graph_prompt_path
|
||||||
- llm_rate_limit_enabled
|
- llm_rate_limit_enabled
|
||||||
|
|
@ -35,16 +35,16 @@ class LLMConfig(BaseSettings):
|
||||||
|
|
||||||
structured_output_framework: str = "instructor"
|
structured_output_framework: str = "instructor"
|
||||||
llm_provider: str = "openai"
|
llm_provider: str = "openai"
|
||||||
llm_model: str = "gpt-4o-mini"
|
llm_model: str = "gpt-5-mini"
|
||||||
llm_endpoint: str = ""
|
llm_endpoint: str = ""
|
||||||
llm_api_key: Optional[str] = None
|
llm_api_key: Optional[str] = None
|
||||||
llm_api_version: Optional[str] = None
|
llm_api_version: Optional[str] = None
|
||||||
llm_temperature: float = 0.0
|
llm_temperature: float = 0.0
|
||||||
llm_streaming: bool = False
|
llm_streaming: bool = False
|
||||||
llm_max_tokens: int = 16384
|
llm_max_completion_tokens: int = 16384
|
||||||
|
|
||||||
baml_llm_provider: str = "openai"
|
baml_llm_provider: str = "openai"
|
||||||
baml_llm_model: str = "gpt-4o-mini"
|
baml_llm_model: str = "gpt-5-mini"
|
||||||
baml_llm_endpoint: str = ""
|
baml_llm_endpoint: str = ""
|
||||||
baml_llm_api_key: Optional[str] = None
|
baml_llm_api_key: Optional[str] = None
|
||||||
baml_llm_temperature: float = 0.0
|
baml_llm_temperature: float = 0.0
|
||||||
|
|
@ -171,7 +171,7 @@ class LLMConfig(BaseSettings):
|
||||||
"api_version": self.llm_api_version,
|
"api_version": self.llm_api_version,
|
||||||
"temperature": self.llm_temperature,
|
"temperature": self.llm_temperature,
|
||||||
"streaming": self.llm_streaming,
|
"streaming": self.llm_streaming,
|
||||||
"max_tokens": self.llm_max_tokens,
|
"max_completion_tokens": self.llm_max_completion_tokens,
|
||||||
"transcription_model": self.transcription_model,
|
"transcription_model": self.transcription_model,
|
||||||
"graph_prompt_path": self.graph_prompt_path,
|
"graph_prompt_path": self.graph_prompt_path,
|
||||||
"rate_limit_enabled": self.llm_rate_limit_enabled,
|
"rate_limit_enabled": self.llm_rate_limit_enabled,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,33 @@
|
||||||
from cognee.exceptions.exceptions import CriticalError
|
from cognee.exceptions.exceptions import CogneeValidationError
|
||||||
|
|
||||||
|
|
||||||
class ContentPolicyFilterError(CriticalError):
|
class ContentPolicyFilterError(CogneeValidationError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LLMAPIKeyNotSetError(CogneeValidationError):
|
||||||
|
"""
|
||||||
|
Raised when the LLM API key is not set in the configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message: str = "LLM API key is not set."):
|
||||||
|
super().__init__(message=message, name="LLMAPIKeyNotSetError")
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedLLMProviderError(CogneeValidationError):
|
||||||
|
"""
|
||||||
|
Raised when an unsupported LLM provider is specified in the configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, provider: str):
|
||||||
|
message = f"Unsupported LLM provider: {provider}"
|
||||||
|
super().__init__(message=message, name="UnsupportedLLMProviderError")
|
||||||
|
|
||||||
|
|
||||||
|
class MissingSystemPromptPathError(CogneeValidationError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str = "MissingSystemPromptPathError",
|
||||||
|
):
|
||||||
|
message = "No system prompt path provided."
|
||||||
|
super().__init__(message, name)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Type
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import instructor
|
import instructor
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||||
LLMInterface,
|
LLMInterface,
|
||||||
)
|
)
|
||||||
|
|
@ -23,7 +23,7 @@ class AnthropicAdapter(LLMInterface):
|
||||||
name = "Anthropic"
|
name = "Anthropic"
|
||||||
model: str
|
model: str
|
||||||
|
|
||||||
def __init__(self, max_tokens: int, model: str = None):
|
def __init__(self, max_completion_tokens: int, model: str = None):
|
||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
self.aclient = instructor.patch(
|
self.aclient = instructor.patch(
|
||||||
|
|
@ -31,7 +31,7 @@ class AnthropicAdapter(LLMInterface):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
@sleep_and_retry_async()
|
@sleep_and_retry_async()
|
||||||
@rate_limit_async
|
@rate_limit_async
|
||||||
|
|
@ -57,7 +57,7 @@ class AnthropicAdapter(LLMInterface):
|
||||||
|
|
||||||
return await self.aclient(
|
return await self.aclient(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
max_tokens=4096,
|
max_completion_tokens=4096,
|
||||||
max_retries=5,
|
max_retries=5,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
|
@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface):
|
||||||
if not text_input:
|
if not text_input:
|
||||||
text_input = "No user input provided."
|
text_input = "No user input provided."
|
||||||
if not system_prompt:
|
if not system_prompt:
|
||||||
raise InvalidValueError(message="No system prompt path provided.")
|
raise MissingSystemPromptPathError()
|
||||||
|
|
||||||
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError
|
||||||
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.modules.observability.get_observe import get_observe
|
from cognee.modules.observability.get_observe import get_observe
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||||
LLMInterface,
|
LLMInterface,
|
||||||
)
|
)
|
||||||
|
|
@ -34,7 +34,7 @@ class GeminiAdapter(LLMInterface):
|
||||||
self,
|
self,
|
||||||
api_key: str,
|
api_key: str,
|
||||||
model: str,
|
model: str,
|
||||||
max_tokens: int,
|
max_completion_tokens: int,
|
||||||
endpoint: Optional[str] = None,
|
endpoint: Optional[str] = None,
|
||||||
api_version: Optional[str] = None,
|
api_version: Optional[str] = None,
|
||||||
streaming: bool = False,
|
streaming: bool = False,
|
||||||
|
|
@ -44,7 +44,7 @@ class GeminiAdapter(LLMInterface):
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.api_version = api_version
|
self.api_version = api_version
|
||||||
self.streaming = streaming
|
self.streaming = streaming
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
@observe(as_type="generation")
|
@observe(as_type="generation")
|
||||||
@sleep_and_retry_async()
|
@sleep_and_retry_async()
|
||||||
|
|
@ -90,7 +90,7 @@ class GeminiAdapter(LLMInterface):
|
||||||
model=f"{self.model}",
|
model=f"{self.model}",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
max_tokens=self.max_tokens,
|
max_completion_tokens=self.max_completion_tokens,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
response_format=response_schema,
|
response_format=response_schema,
|
||||||
timeout=100,
|
timeout=100,
|
||||||
|
|
@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface):
|
||||||
"""
|
"""
|
||||||
Format and display the prompt for a user query.
|
Format and display the prompt for a user query.
|
||||||
|
|
||||||
Raises an InvalidValueError if no system prompt is provided.
|
Raises an MissingQueryParameterError if no system prompt is provided.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface):
|
||||||
if not text_input:
|
if not text_input:
|
||||||
text_input = "No user input provided."
|
text_input = "No user input provided."
|
||||||
if not system_prompt:
|
if not system_prompt:
|
||||||
raise InvalidValueError(message="No system prompt path provided.")
|
raise MissingSystemPromptPathError()
|
||||||
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
||||||
|
|
||||||
formatted_prompt = (
|
formatted_prompt = (
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ class GenericAPIAdapter(LLMInterface):
|
||||||
api_key: str,
|
api_key: str,
|
||||||
model: str,
|
model: str,
|
||||||
name: str,
|
name: str,
|
||||||
max_tokens: int,
|
max_completion_tokens: int,
|
||||||
fallback_model: str = None,
|
fallback_model: str = None,
|
||||||
fallback_api_key: str = None,
|
fallback_api_key: str = None,
|
||||||
fallback_endpoint: str = None,
|
fallback_endpoint: str = None,
|
||||||
|
|
@ -50,7 +50,7 @@ class GenericAPIAdapter(LLMInterface):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
self.fallback_model = fallback_model
|
self.fallback_model = fallback_model
|
||||||
self.fallback_api_key = fallback_api_key
|
self.fallback_api_key = fallback_api_key
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,14 @@
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
|
||||||
OllamaAPIAdapter,
|
OllamaAPIAdapter,
|
||||||
)
|
)
|
||||||
|
from cognee.infrastructure.llm.exceptions import (
|
||||||
|
LLMAPIKeyNotSetError,
|
||||||
|
UnsupportedLLMProviderError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Define an Enum for LLM Providers
|
# Define an Enum for LLM Providers
|
||||||
|
|
@ -35,7 +38,7 @@ def get_llm_client():
|
||||||
|
|
||||||
This function retrieves the configuration for the LLM provider and model, and
|
This function retrieves the configuration for the LLM provider and model, and
|
||||||
initializes the appropriate LLM client adapter accordingly. It raises an
|
initializes the appropriate LLM client adapter accordingly. It raises an
|
||||||
InvalidValueError if the LLM API key is not set for certain providers or if the provider
|
LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider
|
||||||
is unsupported.
|
is unsupported.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -51,15 +54,19 @@ def get_llm_client():
|
||||||
# Check if max_token value is defined in liteLLM for given model
|
# Check if max_token value is defined in liteLLM for given model
|
||||||
# if not use value from cognee configuration
|
# if not use value from cognee configuration
|
||||||
from cognee.infrastructure.llm.utils import (
|
from cognee.infrastructure.llm.utils import (
|
||||||
get_model_max_tokens,
|
get_model_max_completion_tokens,
|
||||||
) # imported here to avoid circular imports
|
) # imported here to avoid circular imports
|
||||||
|
|
||||||
model_max_tokens = get_model_max_tokens(llm_config.llm_model)
|
model_max_completion_tokens = get_model_max_completion_tokens(llm_config.llm_model)
|
||||||
max_tokens = model_max_tokens if model_max_tokens else llm_config.llm_max_tokens
|
max_completion_tokens = (
|
||||||
|
model_max_completion_tokens
|
||||||
|
if model_max_completion_tokens
|
||||||
|
else llm_config.llm_max_completion_tokens
|
||||||
|
)
|
||||||
|
|
||||||
if provider == LLMProvider.OPENAI:
|
if provider == LLMProvider.OPENAI:
|
||||||
if llm_config.llm_api_key is None:
|
if llm_config.llm_api_key is None:
|
||||||
raise InvalidValueError(message="LLM API key is not set.")
|
raise LLMAPIKeyNotSetError()
|
||||||
|
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
|
||||||
OpenAIAdapter,
|
OpenAIAdapter,
|
||||||
|
|
@ -71,7 +78,7 @@ def get_llm_client():
|
||||||
api_version=llm_config.llm_api_version,
|
api_version=llm_config.llm_api_version,
|
||||||
model=llm_config.llm_model,
|
model=llm_config.llm_model,
|
||||||
transcription_model=llm_config.transcription_model,
|
transcription_model=llm_config.transcription_model,
|
||||||
max_tokens=max_tokens,
|
max_completion_tokens=max_completion_tokens,
|
||||||
streaming=llm_config.llm_streaming,
|
streaming=llm_config.llm_streaming,
|
||||||
fallback_api_key=llm_config.fallback_api_key,
|
fallback_api_key=llm_config.fallback_api_key,
|
||||||
fallback_endpoint=llm_config.fallback_endpoint,
|
fallback_endpoint=llm_config.fallback_endpoint,
|
||||||
|
|
@ -80,7 +87,7 @@ def get_llm_client():
|
||||||
|
|
||||||
elif provider == LLMProvider.OLLAMA:
|
elif provider == LLMProvider.OLLAMA:
|
||||||
if llm_config.llm_api_key is None:
|
if llm_config.llm_api_key is None:
|
||||||
raise InvalidValueError(message="LLM API key is not set.")
|
raise LLMAPIKeyNotSetError()
|
||||||
|
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||||
GenericAPIAdapter,
|
GenericAPIAdapter,
|
||||||
|
|
@ -91,7 +98,7 @@ def get_llm_client():
|
||||||
llm_config.llm_api_key,
|
llm_config.llm_api_key,
|
||||||
llm_config.llm_model,
|
llm_config.llm_model,
|
||||||
"Ollama",
|
"Ollama",
|
||||||
max_tokens=max_tokens,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif provider == LLMProvider.ANTHROPIC:
|
elif provider == LLMProvider.ANTHROPIC:
|
||||||
|
|
@ -99,11 +106,13 @@ def get_llm_client():
|
||||||
AnthropicAdapter,
|
AnthropicAdapter,
|
||||||
)
|
)
|
||||||
|
|
||||||
return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
|
return AnthropicAdapter(
|
||||||
|
max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
|
||||||
|
)
|
||||||
|
|
||||||
elif provider == LLMProvider.CUSTOM:
|
elif provider == LLMProvider.CUSTOM:
|
||||||
if llm_config.llm_api_key is None:
|
if llm_config.llm_api_key is None:
|
||||||
raise InvalidValueError(message="LLM API key is not set.")
|
raise LLMAPIKeyNotSetError()
|
||||||
|
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||||
GenericAPIAdapter,
|
GenericAPIAdapter,
|
||||||
|
|
@ -114,7 +123,7 @@ def get_llm_client():
|
||||||
llm_config.llm_api_key,
|
llm_config.llm_api_key,
|
||||||
llm_config.llm_model,
|
llm_config.llm_model,
|
||||||
"Custom",
|
"Custom",
|
||||||
max_tokens=max_tokens,
|
max_completion_tokens=max_completion_tokens,
|
||||||
fallback_api_key=llm_config.fallback_api_key,
|
fallback_api_key=llm_config.fallback_api_key,
|
||||||
fallback_endpoint=llm_config.fallback_endpoint,
|
fallback_endpoint=llm_config.fallback_endpoint,
|
||||||
fallback_model=llm_config.fallback_model,
|
fallback_model=llm_config.fallback_model,
|
||||||
|
|
@ -122,7 +131,7 @@ def get_llm_client():
|
||||||
|
|
||||||
elif provider == LLMProvider.GEMINI:
|
elif provider == LLMProvider.GEMINI:
|
||||||
if llm_config.llm_api_key is None:
|
if llm_config.llm_api_key is None:
|
||||||
raise InvalidValueError(message="LLM API key is not set.")
|
raise LLMAPIKeyNotSetError()
|
||||||
|
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
|
||||||
GeminiAdapter,
|
GeminiAdapter,
|
||||||
|
|
@ -131,11 +140,11 @@ def get_llm_client():
|
||||||
return GeminiAdapter(
|
return GeminiAdapter(
|
||||||
api_key=llm_config.llm_api_key,
|
api_key=llm_config.llm_api_key,
|
||||||
model=llm_config.llm_model,
|
model=llm_config.llm_model,
|
||||||
max_tokens=max_tokens,
|
max_completion_tokens=max_completion_tokens,
|
||||||
endpoint=llm_config.llm_endpoint,
|
endpoint=llm_config.llm_endpoint,
|
||||||
api_version=llm_config.llm_api_version,
|
api_version=llm_config.llm_api_version,
|
||||||
streaming=llm_config.llm_streaming,
|
streaming=llm_config.llm_streaming,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise InvalidValueError(message=f"Unsupported LLM provider: {provider}")
|
raise UnsupportedLLMProviderError(provider)
|
||||||
|
|
|
||||||
|
|
@ -30,16 +30,18 @@ class OllamaAPIAdapter(LLMInterface):
|
||||||
- model
|
- model
|
||||||
- api_key
|
- api_key
|
||||||
- endpoint
|
- endpoint
|
||||||
- max_tokens
|
- max_completion_tokens
|
||||||
- aclient
|
- aclient
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, endpoint: str, api_key: str, model: str, name: str, max_tokens: int):
|
def __init__(
|
||||||
|
self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
|
||||||
|
):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.model = model
|
self.model = model
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
self.aclient = instructor.from_openai(
|
self.aclient = instructor.from_openai(
|
||||||
OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
|
OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
|
||||||
|
|
@ -159,7 +161,7 @@ class OllamaAPIAdapter(LLMInterface):
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens=300,
|
max_completion_tokens=300,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure response is valid before accessing .choices[0].message.content
|
# Ensure response is valid before accessing .choices[0].message.content
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,14 @@ from openai import ContentFilterFinishReasonError
|
||||||
from litellm.exceptions import ContentPolicyViolationError
|
from litellm.exceptions import ContentPolicyViolationError
|
||||||
from instructor.exceptions import InstructorRetryException
|
from instructor.exceptions import InstructorRetryException
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||||
LLMInterface,
|
LLMInterface,
|
||||||
)
|
)
|
||||||
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
from cognee.infrastructure.llm.exceptions import (
|
||||||
|
ContentPolicyFilterError,
|
||||||
|
MissingSystemPromptPathError,
|
||||||
|
)
|
||||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
||||||
rate_limit_async,
|
rate_limit_async,
|
||||||
|
|
@ -62,7 +64,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
api_version: str,
|
api_version: str,
|
||||||
model: str,
|
model: str,
|
||||||
transcription_model: str,
|
transcription_model: str,
|
||||||
max_tokens: int,
|
max_completion_tokens: int,
|
||||||
streaming: bool = False,
|
streaming: bool = False,
|
||||||
fallback_model: str = None,
|
fallback_model: str = None,
|
||||||
fallback_api_key: str = None,
|
fallback_api_key: str = None,
|
||||||
|
|
@ -75,7 +77,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.api_version = api_version
|
self.api_version = api_version
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
self.streaming = streaming
|
self.streaming = streaming
|
||||||
|
|
||||||
self.fallback_model = fallback_model
|
self.fallback_model = fallback_model
|
||||||
|
|
@ -299,7 +301,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
api_base=self.endpoint,
|
api_base=self.endpoint,
|
||||||
api_version=self.api_version,
|
api_version=self.api_version,
|
||||||
max_tokens=300,
|
max_completion_tokens=300,
|
||||||
max_retries=self.MAX_RETRIES,
|
max_retries=self.MAX_RETRIES,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -308,7 +310,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
Format and display the prompt for a user query.
|
Format and display the prompt for a user query.
|
||||||
|
|
||||||
This method formats the prompt using the provided user input and system prompt,
|
This method formats the prompt using the provided user input and system prompt,
|
||||||
returning a string representation. Raises InvalidValueError if the system prompt is not
|
returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not
|
||||||
provided.
|
provided.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
|
@ -325,7 +327,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
if not text_input:
|
if not text_input:
|
||||||
text_input = "No user input provided."
|
text_input = "No user input provided."
|
||||||
if not system_prompt:
|
if not system_prompt:
|
||||||
raise InvalidValueError(message="No system prompt path provided.")
|
raise MissingSystemPromptPathError()
|
||||||
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
||||||
|
|
||||||
formatted_prompt = (
|
formatted_prompt = (
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,10 @@ class GeminiTokenizer(TokenizerInterface):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
max_tokens: int = 3072,
|
max_completion_tokens: int = 3072,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
# Get LLM API key from config
|
# Get LLM API key from config
|
||||||
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
||||||
|
|
|
||||||
|
|
@ -14,17 +14,17 @@ class HuggingFaceTokenizer(TokenizerInterface):
|
||||||
|
|
||||||
Instance variables include:
|
Instance variables include:
|
||||||
- model: str
|
- model: str
|
||||||
- max_tokens: int
|
- max_completion_tokens: int
|
||||||
- tokenizer: AutoTokenizer
|
- tokenizer: AutoTokenizer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
max_tokens: int = 512,
|
max_completion_tokens: int = 512,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
# Import here to make it an optional dependency
|
# Import here to make it an optional dependency
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
|
||||||
|
|
@ -16,17 +16,17 @@ class MistralTokenizer(TokenizerInterface):
|
||||||
|
|
||||||
Instance variables include:
|
Instance variables include:
|
||||||
- model: str
|
- model: str
|
||||||
- max_tokens: int
|
- max_completion_tokens: int
|
||||||
- tokenizer: MistralTokenizer
|
- tokenizer: MistralTokenizer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
max_tokens: int = 3072,
|
max_completion_tokens: int = 3072,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
|
|
||||||
# Import here to make it an optional dependency
|
# Import here to make it an optional dependency
|
||||||
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@ class TikTokenTokenizer(TokenizerInterface):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
max_tokens: int = 8191,
|
max_completion_tokens: int = 8191,
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_completion_tokens = max_completion_tokens
|
||||||
# Initialize TikToken for GPT based on model
|
# Initialize TikToken for GPT based on model
|
||||||
if model:
|
if model:
|
||||||
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
||||||
|
|
@ -93,9 +93,9 @@ class TikTokenTokenizer(TokenizerInterface):
|
||||||
num_tokens = len(self.tokenizer.encode(text))
|
num_tokens = len(self.tokenizer.encode(text))
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
||||||
def trim_text_to_max_tokens(self, text: str) -> str:
|
def trim_text_to_max_completion_tokens(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Trim the text so that the number of tokens does not exceed max_tokens.
|
Trim the text so that the number of tokens does not exceed max_completion_tokens.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
@ -111,13 +111,13 @@ class TikTokenTokenizer(TokenizerInterface):
|
||||||
num_tokens = self.count_tokens(text)
|
num_tokens = self.count_tokens(text)
|
||||||
|
|
||||||
# If the number of tokens is within the limit, return the text as is
|
# If the number of tokens is within the limit, return the text as is
|
||||||
if num_tokens <= self.max_tokens:
|
if num_tokens <= self.max_completion_tokens:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# If the number exceeds the limit, trim the text
|
# If the number exceeds the limit, trim the text
|
||||||
# This is a simple trim, it may cut words in half; consider using word boundaries for a cleaner cut
|
# This is a simple trim, it may cut words in half; consider using word boundaries for a cleaner cut
|
||||||
encoded_text = self.tokenizer.encode(text)
|
encoded_text = self.tokenizer.encode(text)
|
||||||
trimmed_encoded_text = encoded_text[: self.max_tokens]
|
trimmed_encoded_text = encoded_text[: self.max_completion_tokens]
|
||||||
# Decoding the trimmed text
|
# Decoding the trimmed text
|
||||||
trimmed_text = self.tokenizer.decode(trimmed_encoded_text)
|
trimmed_text = self.tokenizer.decode(trimmed_encoded_text)
|
||||||
return trimmed_text
|
return trimmed_text
|
||||||
|
|
|
||||||
|
|
@ -32,13 +32,13 @@ def get_max_chunk_tokens():
|
||||||
|
|
||||||
# We need to make sure chunk size won't take more than half of LLM max context token size
|
# We need to make sure chunk size won't take more than half of LLM max context token size
|
||||||
# but it also can't be bigger than the embedding engine max token size
|
# but it also can't be bigger than the embedding engine max token size
|
||||||
llm_cutoff_point = llm_client.max_tokens // 2 # Round down the division
|
llm_cutoff_point = llm_client.max_completion_tokens // 2 # Round down the division
|
||||||
max_chunk_tokens = min(embedding_engine.max_tokens, llm_cutoff_point)
|
max_chunk_tokens = min(embedding_engine.max_completion_tokens, llm_cutoff_point)
|
||||||
|
|
||||||
return max_chunk_tokens
|
return max_chunk_tokens
|
||||||
|
|
||||||
|
|
||||||
def get_model_max_tokens(model_name: str):
|
def get_model_max_completion_tokens(model_name: str):
|
||||||
"""
|
"""
|
||||||
Retrieve the maximum token limit for a specified model name if it exists.
|
Retrieve the maximum token limit for a specified model name if it exists.
|
||||||
|
|
||||||
|
|
@ -56,15 +56,15 @@ def get_model_max_tokens(model_name: str):
|
||||||
|
|
||||||
Number of max tokens of model, or None if model is unknown
|
Number of max tokens of model, or None if model is unknown
|
||||||
"""
|
"""
|
||||||
max_tokens = None
|
max_completion_tokens = None
|
||||||
|
|
||||||
if model_name in litellm.model_cost:
|
if model_name in litellm.model_cost:
|
||||||
max_tokens = litellm.model_cost[model_name]["max_tokens"]
|
max_completion_tokens = litellm.model_cost[model_name]["max_tokens"]
|
||||||
logger.debug(f"Max input tokens for {model_name}: {max_tokens}")
|
logger.debug(f"Max input tokens for {model_name}: {max_completion_tokens}")
|
||||||
else:
|
else:
|
||||||
logger.info("Model not found in LiteLLM's model_cost.")
|
logger.info("Model not found in LiteLLM's model_cost.")
|
||||||
|
|
||||||
return max_tokens
|
return max_completion_tokens
|
||||||
|
|
||||||
|
|
||||||
async def test_llm_connection():
|
async def test_llm_connection():
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ class LoaderInterface(ABC):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def load(self, file_path: str, file_stream: Optional[Any] = None, **kwargs):
|
async def load(self, file_path: str, **kwargs):
|
||||||
"""
|
"""
|
||||||
Load and process the file, returning standardized result.
|
Load and process the file, returning standardized result.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import (
|
||||||
|
CogneeValidationError,
|
||||||
|
CogneeConfigurationError,
|
||||||
|
)
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class UnstructuredLibraryImportError(CogneeApiError):
|
class UnstructuredLibraryImportError(CogneeConfigurationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Import error. Unstructured library is not installed.",
|
message: str = "Import error. Unstructured library is not installed.",
|
||||||
|
|
@ -12,7 +15,7 @@ class UnstructuredLibraryImportError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class UnauthorizedDataAccessError(CogneeApiError):
|
class UnauthorizedDataAccessError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "User does not have permission to access this data.",
|
message: str = "User does not have permission to access this data.",
|
||||||
|
|
@ -22,7 +25,7 @@ class UnauthorizedDataAccessError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class DatasetNotFoundError(CogneeApiError):
|
class DatasetNotFoundError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Dataset not found.",
|
message: str = "Dataset not found.",
|
||||||
|
|
@ -32,7 +35,7 @@ class DatasetNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class DatasetTypeError(CogneeApiError):
|
class DatasetTypeError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Dataset type not supported.",
|
message: str = "Dataset type not supported.",
|
||||||
|
|
@ -40,3 +43,13 @@ class DatasetTypeError(CogneeApiError):
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidTableAttributeError(CogneeValidationError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "The provided data object is missing the required '__tablename__' attribute.",
|
||||||
|
name: str = "InvalidTableAttributeError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from cognee.exceptions import InvalidAttributeError
|
from cognee.modules.data.exceptions.exceptions import InvalidTableAttributeError
|
||||||
from cognee.modules.data.models import Data
|
from cognee.modules.data.models import Data
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
||||||
|
|
@ -13,9 +13,7 @@ async def delete_data(data: Data):
|
||||||
ValueError: If the data object is invalid.
|
ValueError: If the data object is invalid.
|
||||||
"""
|
"""
|
||||||
if not hasattr(data, "__tablename__"):
|
if not hasattr(data, "__tablename__"):
|
||||||
raise InvalidAttributeError(
|
raise InvalidTableAttributeError()
|
||||||
message="The provided data object is missing the required '__tablename__' attribute."
|
|
||||||
)
|
|
||||||
|
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeSystemError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class PyPdfInternalError(CogneeApiError):
|
class PyPdfInternalError(CogneeSystemError):
|
||||||
"""Internal pypdf error"""
|
"""Internal pypdf error"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,11 @@ import time
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from typing import List, Dict, Union, Optional, Type
|
from typing import List, Dict, Union, Optional, Type
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.modules.graph.exceptions import (
|
||||||
from cognee.modules.graph.exceptions import EntityNotFoundError, EntityAlreadyExistsError
|
EntityNotFoundError,
|
||||||
|
EntityAlreadyExistsError,
|
||||||
|
InvalidDimensionsError,
|
||||||
|
)
|
||||||
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
||||||
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
||||||
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
||||||
|
|
@ -66,8 +69,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
||||||
node_name: Optional[List[str]] = None,
|
node_name: Optional[List[str]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if node_dimension < 1 or edge_dimension < 1:
|
if node_dimension < 1 or edge_dimension < 1:
|
||||||
raise InvalidValueError(message="Dimensions must be positive integers")
|
raise InvalidDimensionsError()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import List, Dict, Optional, Any, Union
|
from typing import List, Dict, Optional, Any, Union
|
||||||
|
from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
|
|
||||||
|
|
||||||
class Node:
|
class Node:
|
||||||
|
|
@ -24,7 +23,7 @@ class Node:
|
||||||
self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
|
self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
|
||||||
):
|
):
|
||||||
if dimension <= 0:
|
if dimension <= 0:
|
||||||
raise InvalidValueError(message="Dimension must be a positive integer")
|
raise InvalidDimensionsError()
|
||||||
self.id = node_id
|
self.id = node_id
|
||||||
self.attributes = attributes if attributes is not None else {}
|
self.attributes = attributes if attributes is not None else {}
|
||||||
self.attributes["vector_distance"] = float("inf")
|
self.attributes["vector_distance"] = float("inf")
|
||||||
|
|
@ -58,9 +57,7 @@ class Node:
|
||||||
|
|
||||||
def is_node_alive_in_dimension(self, dimension: int) -> bool:
|
def is_node_alive_in_dimension(self, dimension: int) -> bool:
|
||||||
if dimension < 0 or dimension >= len(self.status):
|
if dimension < 0 or dimension >= len(self.status):
|
||||||
raise InvalidValueError(
|
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
||||||
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
|
|
||||||
)
|
|
||||||
return self.status[dimension] == 1
|
return self.status[dimension] == 1
|
||||||
|
|
||||||
def add_attribute(self, key: str, value: Any) -> None:
|
def add_attribute(self, key: str, value: Any) -> None:
|
||||||
|
|
@ -110,7 +107,7 @@ class Edge:
|
||||||
dimension: int = 1,
|
dimension: int = 1,
|
||||||
):
|
):
|
||||||
if dimension <= 0:
|
if dimension <= 0:
|
||||||
raise InvalidValueError(message="Dimensions must be a positive integer.")
|
raise InvalidDimensionsError()
|
||||||
self.node1 = node1
|
self.node1 = node1
|
||||||
self.node2 = node2
|
self.node2 = node2
|
||||||
self.attributes = attributes if attributes is not None else {}
|
self.attributes = attributes if attributes is not None else {}
|
||||||
|
|
@ -120,9 +117,7 @@ class Edge:
|
||||||
|
|
||||||
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
|
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
|
||||||
if dimension < 0 or dimension >= len(self.status):
|
if dimension < 0 or dimension >= len(self.status):
|
||||||
raise InvalidValueError(
|
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
||||||
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
|
|
||||||
)
|
|
||||||
return self.status[dimension] == 1
|
return self.status[dimension] == 1
|
||||||
|
|
||||||
def add_attribute(self, key: str, value: Any) -> None:
|
def add_attribute(self, key: str, value: Any) -> None:
|
||||||
|
|
|
||||||
|
|
@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various graph errors
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
EntityNotFoundError,
|
EntityNotFoundError,
|
||||||
EntityAlreadyExistsError,
|
EntityAlreadyExistsError,
|
||||||
|
InvalidDimensionsError,
|
||||||
|
DimensionOutOfRangeError,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class EntityNotFoundError(CogneeApiError):
|
class EntityNotFoundError(CogneeValidationError):
|
||||||
"""Database returns nothing"""
|
"""Database returns nothing"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -14,7 +14,7 @@ class EntityNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class EntityAlreadyExistsError(CogneeApiError):
|
class EntityAlreadyExistsError(CogneeValidationError):
|
||||||
"""Conflict detected, like trying to create a resource that already exists"""
|
"""Conflict detected, like trying to create a resource that already exists"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -24,3 +24,25 @@ class EntityAlreadyExistsError(CogneeApiError):
|
||||||
status_code=status.HTTP_409_CONFLICT,
|
status_code=status.HTTP_409_CONFLICT,
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidDimensionsError(CogneeValidationError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str = "InvalidDimensionsError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = "Dimensions must be positive integers."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class DimensionOutOfRangeError(CogneeValidationError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dimension: int,
|
||||||
|
max_index: int,
|
||||||
|
name: str = "DimensionOutOfRangeError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = f"Dimension {dimension} is out of range. Valid range is 0 to {max_index}."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class IngestionError(CogneeApiError):
|
class IngestionError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Type of data sent to classify not supported.",
|
message: str = "Type of data sent to classify not supported.",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeSystemError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class OntologyInitializationError(CogneeApiError):
|
class OntologyInitializationError(CogneeSystemError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Ontology initialization failed",
|
message: str = "Ontology initialization failed",
|
||||||
|
|
@ -12,7 +12,7 @@ class OntologyInitializationError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class FindClosestMatchError(CogneeApiError):
|
class FindClosestMatchError(CogneeSystemError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Error in find_closest_match",
|
message: str = "Error in find_closest_match",
|
||||||
|
|
@ -22,7 +22,7 @@ class FindClosestMatchError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class GetSubgraphError(CogneeApiError):
|
class GetSubgraphError(CogneeSystemError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Failed to retrieve subgraph",
|
message: str = "Failed to retrieve subgraph",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeSystemError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class PipelineRunFailedError(CogneeApiError):
|
class PipelineRunFailedError(CogneeSystemError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Pipeline run failed.",
|
message: str = "Pipeline run failed.",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
|
|
||||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||||
from cognee.modules.retrieval.utils.completion import generate_completion
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
||||||
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported, CypherSearchError
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported, CypherSearchError
|
||||||
|
|
@ -31,8 +30,7 @@ class CypherSearchRetriever(BaseRetriever):
|
||||||
"""
|
"""
|
||||||
Retrieves relevant context using a cypher query.
|
Retrieves relevant context using a cypher query.
|
||||||
|
|
||||||
If the graph engine is an instance of NetworkXAdapter, raises SearchTypeNotSupported. If
|
If any error occurs during execution, logs the error and raises CypherSearchError.
|
||||||
any error occurs during execution, logs the error and raises CypherSearchError.
|
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
@ -46,12 +44,6 @@ class CypherSearchRetriever(BaseRetriever):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
graph_engine = await get_graph_engine()
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
if isinstance(graph_engine, NetworkXAdapter):
|
|
||||||
raise SearchTypeNotSupported(
|
|
||||||
"CYPHER search type not supported for NetworkXAdapter."
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await graph_engine.query(query)
|
result = await graph_engine.query(query)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Failed to execture cypher search retrieval: %s", str(e))
|
logger.error("Failed to execture cypher search retrieval: %s", str(e))
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
from cognee.exceptions import CogneeApiError, CriticalError
|
from cognee.exceptions import CogneeValidationError, CogneeSystemError
|
||||||
|
|
||||||
|
|
||||||
class SearchTypeNotSupported(CogneeApiError):
|
class SearchTypeNotSupported(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "CYPHER search type not supported by the adapter.",
|
message: str = "CYPHER search type not supported by the adapter.",
|
||||||
|
|
@ -12,7 +12,7 @@ class SearchTypeNotSupported(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class CypherSearchError(CogneeApiError):
|
class CypherSearchError(CogneeSystemError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "An error occurred during the execution of the Cypher query.",
|
message: str = "An error occurred during the execution of the Cypher query.",
|
||||||
|
|
@ -22,11 +22,17 @@ class CypherSearchError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class NoDataError(CriticalError):
|
class NoDataError(CogneeValidationError):
|
||||||
message: str = "No data found in the system, please add data first."
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "No data found in the system, please add data first.",
|
||||||
|
name: str = "NoDataError",
|
||||||
|
status_code: int = status.HTTP_404_NOT_FOUND,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class CollectionDistancesNotFoundError(CogneeApiError):
|
class CollectionDistancesNotFoundError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "No collection distances found for the given query.",
|
message: str = "No collection distances found for the given query.",
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
|
|
||||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||||
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
||||||
|
|
@ -123,9 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever):
|
||||||
"""
|
"""
|
||||||
graph_engine = await get_graph_engine()
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
if isinstance(graph_engine, (NetworkXAdapter)):
|
|
||||||
raise SearchTypeNotSupported("Natural language search type not supported.")
|
|
||||||
|
|
||||||
return await self._execute_cypher_query(query, graph_engine)
|
return await self._execute_cypher_query(query, graph_engine)
|
||||||
|
|
||||||
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
||||||
|
|
|
||||||
7
cognee/modules/search/exceptions/__init__.py
Normal file
7
cognee/modules/search/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various data errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import UnsupportedSearchTypeError
|
||||||
15
cognee/modules/search/exceptions/exceptions.py
Normal file
15
cognee/modules/search/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
from cognee.exceptions import (
|
||||||
|
CogneeValidationError,
|
||||||
|
)
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedSearchTypeError(CogneeValidationError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
search_type: str,
|
||||||
|
name: str = "UnsupportedSearchTypeError",
|
||||||
|
status_code: int = status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
message = f"Unsupported search type: {search_type}"
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
@ -3,9 +3,8 @@ import json
|
||||||
import asyncio
|
import asyncio
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Callable, List, Optional, Type, Union
|
from typing import Callable, List, Optional, Type, Union
|
||||||
|
from cognee.modules.search.exceptions import UnsupportedSearchTypeError
|
||||||
from cognee.context_global_variables import set_database_global_context_variables
|
from cognee.context_global_variables import set_database_global_context_variables
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
||||||
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
||||||
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
||||||
|
|
@ -143,7 +142,7 @@ async def specific_search(
|
||||||
search_task = search_tasks.get(query_type)
|
search_task = search_tasks.get(query_type)
|
||||||
|
|
||||||
if search_task is None:
|
if search_task is None:
|
||||||
raise InvalidValueError(message=f"Unsupported search type: {query_type}")
|
raise UnsupportedSearchTypeError(str(query_type))
|
||||||
|
|
||||||
send_telemetry("cognee.search EXECUTION STARTED", user.id)
|
send_telemetry("cognee.search EXECUTION STARTED", user.id)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -88,8 +88,8 @@ def get_settings() -> SettingsDict:
|
||||||
"models": {
|
"models": {
|
||||||
"openai": [
|
"openai": [
|
||||||
{
|
{
|
||||||
"value": "gpt-4o-mini",
|
"value": "gpt-5-mini",
|
||||||
"label": "gpt-4o-mini",
|
"label": "gpt-5-mini",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": "gpt-4o",
|
"value": "gpt-4o",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class RoleNotFoundError(CogneeApiError):
|
class RoleNotFoundError(CogneeValidationError):
|
||||||
"""User group not found"""
|
"""User group not found"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -14,7 +14,7 @@ class RoleNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class TenantNotFoundError(CogneeApiError):
|
class TenantNotFoundError(CogneeValidationError):
|
||||||
"""User group not found"""
|
"""User group not found"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -26,7 +26,7 @@ class TenantNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class UserNotFoundError(CogneeApiError):
|
class UserNotFoundError(CogneeValidationError):
|
||||||
"""User not found"""
|
"""User not found"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -38,7 +38,7 @@ class UserNotFoundError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class PermissionDeniedError(CogneeApiError):
|
class PermissionDeniedError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "User does not have permission on documents.",
|
message: str = "User does not have permission on documents.",
|
||||||
|
|
@ -48,7 +48,7 @@ class PermissionDeniedError(CogneeApiError):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
class PermissionNotFoundError(CogneeApiError):
|
class PermissionNotFoundError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Permission type does not exist.",
|
message: str = "Permission type does not exist.",
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class IngestionError(CogneeApiError):
|
class IngestionError(CogneeValidationError):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
message: str = "Failed to load data.",
|
message: str = "Failed to load data.",
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import networkx as nx
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import http.server
|
import http.server
|
||||||
import socketserver
|
import socketserver
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
from cognee.exceptions import CogneeApiError
|
from cognee.exceptions import CogneeValidationError
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
class NoRelevantDataError(CogneeApiError):
|
class NoRelevantDataError(CogneeValidationError):
|
||||||
"""
|
"""
|
||||||
Represents an error when no relevant data is found during a search. This class is a
|
Represents an error when no relevant data is found during a search. This class is a
|
||||||
subclass of CogneeApiError.
|
subclass of CogneeValidationError.
|
||||||
|
|
||||||
Public methods:
|
Public methods:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from cognee.modules.data.processing.document_types import (
|
||||||
)
|
)
|
||||||
from cognee.modules.engine.models.node_set import NodeSet
|
from cognee.modules.engine.models.node_set import NodeSet
|
||||||
from cognee.modules.engine.utils.generate_node_id import generate_node_id
|
from cognee.modules.engine.utils.generate_node_id import generate_node_id
|
||||||
|
from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
|
||||||
|
|
||||||
EXTENSION_TO_DOCUMENT_CLASS = {
|
EXTENSION_TO_DOCUMENT_CLASS = {
|
||||||
"pdf": PdfDocument, # Text documents
|
"pdf": PdfDocument, # Text documents
|
||||||
|
|
@ -111,6 +112,9 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
||||||
- list[Document]: A list of Document objects created based on the classified data
|
- list[Document]: A list of Document objects created based on the classified data
|
||||||
documents.
|
documents.
|
||||||
"""
|
"""
|
||||||
|
if not isinstance(data_documents, list):
|
||||||
|
raise WrongDataDocumentInputError("data_documents")
|
||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
for data_item in data_documents:
|
for data_item in data_documents:
|
||||||
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
|
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
|
||||||
|
|
|
||||||
11
cognee/tasks/documents/exceptions/__init__.py
Normal file
11
cognee/tasks/documents/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various data errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import (
|
||||||
|
WrongDataDocumentInputError,
|
||||||
|
InvalidChunkSizeError,
|
||||||
|
InvalidChunkerError,
|
||||||
|
)
|
||||||
36
cognee/tasks/documents/exceptions/exceptions.py
Normal file
36
cognee/tasks/documents/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
from cognee.exceptions import (
|
||||||
|
CogneeValidationError,
|
||||||
|
CogneeConfigurationError,
|
||||||
|
)
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
|
class WrongDataDocumentInputError(CogneeValidationError):
|
||||||
|
"""Raised when a wrong data document is provided."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
field: str,
|
||||||
|
name: str = "WrongDataDocumentInputError",
|
||||||
|
status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
):
|
||||||
|
message = f"Missing of invalid parameter: '{field}'."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidChunkSizeError(CogneeValidationError):
|
||||||
|
def __init__(self, value):
|
||||||
|
super().__init__(
|
||||||
|
message=f"max_chunk_size must be a positive integer (got {value}).",
|
||||||
|
name="InvalidChunkSizeError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidChunkerError(CogneeValidationError):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
message="chunker must be a valid Chunker class.",
|
||||||
|
name="InvalidChunkerError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
|
@ -8,6 +8,7 @@ from cognee.modules.data.models import Data
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.chunking.TextChunker import TextChunker
|
from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
from cognee.modules.chunking.Chunker import Chunker
|
from cognee.modules.chunking.Chunker import Chunker
|
||||||
|
from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError
|
||||||
|
|
||||||
|
|
||||||
async def update_document_token_count(document_id: UUID, token_count: int) -> None:
|
async def update_document_token_count(document_id: UUID, token_count: int) -> None:
|
||||||
|
|
@ -37,6 +38,13 @@ async def extract_chunks_from_documents(
|
||||||
- The `read` method of the `Document` class must be implemented to support the chunking operation.
|
- The `read` method of the `Document` class must be implemented to support the chunking operation.
|
||||||
- The `chunker` parameter determines the chunking logic and should align with the document type.
|
- The `chunker` parameter determines the chunking logic and should align with the document type.
|
||||||
"""
|
"""
|
||||||
|
if not isinstance(max_chunk_size, int) or max_chunk_size <= 0:
|
||||||
|
raise InvalidChunkSizeError(max_chunk_size)
|
||||||
|
if not isinstance(chunker, type):
|
||||||
|
raise InvalidChunkerError()
|
||||||
|
if not hasattr(chunker, "read"):
|
||||||
|
raise InvalidChunkerError()
|
||||||
|
|
||||||
for document in documents:
|
for document in documents:
|
||||||
document_token_count = 0
|
document_token_count = 0
|
||||||
|
|
||||||
|
|
@ -48,5 +56,3 @@ async def extract_chunks_from_documents(
|
||||||
yield document_chunk
|
yield document_chunk
|
||||||
|
|
||||||
await update_document_token_count(document.id, document_token_count)
|
await update_document_token_count(document.id, document_token_count)
|
||||||
|
|
||||||
# todo rita
|
|
||||||
|
|
|
||||||
12
cognee/tasks/graph/exceptions/__init__.py
Normal file
12
cognee/tasks/graph/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various data errors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import (
|
||||||
|
InvalidDataChunksError,
|
||||||
|
InvalidGraphModelError,
|
||||||
|
InvalidOntologyAdapterError,
|
||||||
|
InvalidChunkGraphInputError,
|
||||||
|
)
|
||||||
41
cognee/tasks/graph/exceptions/exceptions.py
Normal file
41
cognee/tasks/graph/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
from cognee.exceptions import (
|
||||||
|
CogneeValidationError,
|
||||||
|
CogneeConfigurationError,
|
||||||
|
)
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidDataChunksError(CogneeValidationError):
|
||||||
|
def __init__(self, detail: str):
|
||||||
|
super().__init__(
|
||||||
|
message=f"Invalid data_chunks: {detail}",
|
||||||
|
name="InvalidDataChunksError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidGraphModelError(CogneeValidationError):
|
||||||
|
def __init__(self, got):
|
||||||
|
super().__init__(
|
||||||
|
message=f"graph_model must be a subclass of BaseModel (got {got}).",
|
||||||
|
name="InvalidGraphModelError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidOntologyAdapterError(CogneeConfigurationError):
|
||||||
|
def __init__(self, got):
|
||||||
|
super().__init__(
|
||||||
|
message=f"ontology_adapter lacks required interface (got {got}).",
|
||||||
|
name="InvalidOntologyAdapterError",
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidChunkGraphInputError(CogneeValidationError):
|
||||||
|
def __init__(self, detail: str):
|
||||||
|
super().__init__(
|
||||||
|
message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}",
|
||||||
|
name="InvalidChunkGraphInputError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
|
@ -12,6 +12,12 @@ from cognee.modules.graph.utils import (
|
||||||
)
|
)
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||||
|
from cognee.tasks.graph.exceptions import (
|
||||||
|
InvalidGraphModelError,
|
||||||
|
InvalidDataChunksError,
|
||||||
|
InvalidChunkGraphInputError,
|
||||||
|
InvalidOntologyAdapterError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def integrate_chunk_graphs(
|
async def integrate_chunk_graphs(
|
||||||
|
|
@ -21,6 +27,20 @@ async def integrate_chunk_graphs(
|
||||||
ontology_adapter: OntologyResolver,
|
ontology_adapter: OntologyResolver,
|
||||||
) -> List[DocumentChunk]:
|
) -> List[DocumentChunk]:
|
||||||
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
|
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
|
||||||
|
|
||||||
|
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
|
||||||
|
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
|
||||||
|
if len(data_chunks) != len(chunk_graphs):
|
||||||
|
raise InvalidChunkGraphInputError(
|
||||||
|
f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs."
|
||||||
|
)
|
||||||
|
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
|
||||||
|
raise InvalidGraphModelError(graph_model)
|
||||||
|
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
|
||||||
|
raise InvalidOntologyAdapterError(
|
||||||
|
type(ontology_adapter).__name__ if ontology_adapter else "None"
|
||||||
|
)
|
||||||
|
|
||||||
graph_engine = await get_graph_engine()
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
if graph_model is not KnowledgeGraph:
|
if graph_model is not KnowledgeGraph:
|
||||||
|
|
@ -55,6 +75,14 @@ async def extract_graph_from_data(
|
||||||
"""
|
"""
|
||||||
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
|
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if not isinstance(data_chunks, list) or not data_chunks:
|
||||||
|
raise InvalidDataChunksError("must be a non-empty list of DocumentChunk.")
|
||||||
|
if not all(hasattr(c, "text") for c in data_chunks):
|
||||||
|
raise InvalidDataChunksError("each chunk must have a 'text' attribute")
|
||||||
|
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
|
||||||
|
raise InvalidGraphModelError(graph_model)
|
||||||
|
|
||||||
chunk_graphs = await asyncio.gather(
|
chunk_graphs = await asyncio.gather(
|
||||||
*[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
|
*[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
8
cognee/tasks/ingestion/exceptions/__init__.py
Normal file
8
cognee/tasks/ingestion/exceptions/__init__.py
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
"""
|
||||||
|
Custom exceptions for the Cognee API.
|
||||||
|
|
||||||
|
This module defines a set of exceptions for handling various application errors,
|
||||||
|
such as System, Validation, Configuration or TransientErrors
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .exceptions import S3FileSystemNotFoundError
|
||||||
12
cognee/tasks/ingestion/exceptions/exceptions.py
Normal file
12
cognee/tasks/ingestion/exceptions/exceptions.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
from cognee.exceptions import CogneeSystemError
|
||||||
|
from fastapi import status
|
||||||
|
|
||||||
|
|
||||||
|
class S3FileSystemNotFoundError(CogneeSystemError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name: str = "S3FileSystemNotFoundError",
|
||||||
|
status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
):
|
||||||
|
message = "Could not find S3FileSystem."
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue