Compare commits
2 commits
feature/co
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a505eef95b | ||
|
|
8cd3aab1ef |
375 changed files with 8884 additions and 30106 deletions
|
|
@ -91,15 +91,6 @@ DB_NAME=cognee_db
|
|||
#DB_USERNAME=cognee
|
||||
#DB_PASSWORD=cognee
|
||||
|
||||
# -- Advanced: Custom database connection arguments (optional) ---------------
|
||||
# Pass additional connection parameters as JSON. Useful for SSL, timeouts, etc.
|
||||
# Examples:
|
||||
# For PostgreSQL with SSL:
|
||||
# DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
|
||||
# For SQLite with custom timeout:
|
||||
# DATABASE_CONNECT_ARGS='{"timeout": 60}'
|
||||
#DATABASE_CONNECT_ARGS='{}'
|
||||
|
||||
################################################################################
|
||||
# 🕸️ Graph Database settings
|
||||
################################################################################
|
||||
|
|
|
|||
1
.github/ISSUE_TEMPLATE/documentation.yml
vendored
1
.github/ISSUE_TEMPLATE/documentation.yml
vendored
|
|
@ -71,3 +71,4 @@ body:
|
|||
required: true
|
||||
- label: I have specified the location of the documentation issue
|
||||
required: true
|
||||
|
||||
|
|
|
|||
1
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
1
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
|
|
@ -75,3 +75,4 @@ body:
|
|||
required: true
|
||||
- label: I have described my specific use case
|
||||
required: true
|
||||
|
||||
|
|
|
|||
2
.github/core-team.txt
vendored
2
.github/core-team.txt
vendored
|
|
@ -8,3 +8,5 @@ lxobr
|
|||
pazone
|
||||
siillee
|
||||
vasilije1990
|
||||
|
||||
|
||||
|
|
|
|||
11
.github/pull_request_template.md
vendored
11
.github/pull_request_template.md
vendored
|
|
@ -10,21 +10,26 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
|
|||
<!--
|
||||
* Key requirements to the new feature or modification;
|
||||
* Proof that the changes work and meet the requirements;
|
||||
* Include instructions on how to verify the changes. Describe how to test it locally;
|
||||
* Proof that it's sufficiently tested.
|
||||
-->
|
||||
|
||||
## Type of Change
|
||||
<!-- Please check the relevant option -->
|
||||
- [ ] Bug fix (non-breaking change that fixes an issue)
|
||||
- [ ] New feature (non-breaking change that adds functionality)
|
||||
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
|
||||
- [ ] Documentation update
|
||||
- [ ] Code refactoring
|
||||
- [ ] Performance improvement
|
||||
- [ ] Other (please specify):
|
||||
|
||||
## Screenshots
|
||||
<!-- ADD SCREENSHOT OF LOCAL TESTS PASSING-->
|
||||
## Screenshots/Videos (if applicable)
|
||||
<!-- Add screenshots or videos to help explain your changes -->
|
||||
|
||||
## Pre-submission Checklist
|
||||
<!-- Please check all boxes that apply before submitting your PR -->
|
||||
- [ ] **I have tested my changes thoroughly before submitting this PR** (See `CONTRIBUTING.md`)
|
||||
- [ ] **I have tested my changes thoroughly before submitting this PR**
|
||||
- [ ] **This PR contains minimal changes necessary to address the issue/feature**
|
||||
- [ ] My code follows the project's coding standards and style guidelines
|
||||
- [ ] I have added tests that prove my fix is effective or that my feature works
|
||||
|
|
|
|||
37
.github/workflows/basic_tests.yml
vendored
37
.github/workflows/basic_tests.yml
vendored
|
|
@ -34,6 +34,43 @@ env:
|
|||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
|
||||
lint:
|
||||
name: Run Linting
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Run Linting
|
||||
uses: astral-sh/ruff-action@v2
|
||||
|
||||
format-check:
|
||||
name: Run Formatting Check
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Run Formatting Check
|
||||
uses: astral-sh/ruff-action@v2
|
||||
with:
|
||||
args: "format --check"
|
||||
|
||||
unit-tests:
|
||||
name: Run Unit Tests
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
75
.github/workflows/e2e_tests.yml
vendored
75
.github/workflows/e2e_tests.yml
vendored
|
|
@ -288,7 +288,7 @@ jobs:
|
|||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run pytest cognee/tests/test_permissions.py -v --log-level=INFO
|
||||
run: uv run python ./cognee/tests/test_permissions.py
|
||||
|
||||
test-multi-tenancy:
|
||||
name: Test multi tenancy with different situations in Cognee
|
||||
|
|
@ -315,31 +315,6 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_multi_tenancy.py
|
||||
|
||||
test-data-label:
|
||||
name: Test adding of label for data in Cognee
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run custom data label test
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_custom_data_label.py
|
||||
|
||||
test-graph-edges:
|
||||
name: Test graph edge ingestion
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
@ -659,51 +634,3 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_pipeline_cache.py
|
||||
|
||||
run_usage_logger_test:
|
||||
name: Usage logger test (API/MCP)
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
services:
|
||||
redis:
|
||||
image: redis:7
|
||||
ports:
|
||||
- 6379:6379
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 5s
|
||||
--health-timeout 3s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
extra-dependencies: "redis"
|
||||
|
||||
- name: Install cognee-mcp (local version)
|
||||
shell: bash
|
||||
run: |
|
||||
uv pip install -e ./cognee-mcp
|
||||
|
||||
- name: Run api/tool usage logger
|
||||
env:
|
||||
ENV: dev
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
||||
USAGE_LOGGING: true
|
||||
CACHE_BACKEND: 'redis'
|
||||
run: uv run pytest cognee/tests/test_usage_logger_e2e.py -v --log-level=INFO
|
||||
|
|
|
|||
61
.github/workflows/examples_tests.yml
vendored
61
.github/workflows/examples_tests.yml
vendored
|
|
@ -257,7 +257,7 @@ jobs:
|
|||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Permissions Example
|
||||
- name: Run Memify Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
|
@ -270,65 +270,6 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/permissions_example.py
|
||||
|
||||
test-s3-permissions-example: # Make sure permission and multi-user mode work with S3 file system
|
||||
name: Run Permissions Example
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
services:
|
||||
postgres: # Using postgres to avoid storing and using SQLite from S3
|
||||
image: pgvector/pgvector:pg17
|
||||
env:
|
||||
POSTGRES_USER: cognee
|
||||
POSTGRES_PASSWORD: cognee
|
||||
POSTGRES_DB: cognee_db
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
ports:
|
||||
- 5432:5432
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
extra-dependencies: "postgres aws"
|
||||
|
||||
- name: Run S3 Permissions Example
|
||||
env:
|
||||
ENV: 'dev'
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
STORAGE_BACKEND: 's3'
|
||||
AWS_REGION: eu-west-1
|
||||
AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
|
||||
STORAGE_BUCKET_NAME: github-runner-cognee-tests
|
||||
DATA_ROOT_DIRECTORY: "s3://github-runner-cognee-tests/cognee/data"
|
||||
SYSTEM_ROOT_DIRECTORY: "s3://github-runner-cognee-tests/cognee/system"
|
||||
DB_PROVIDER: 'postgres'
|
||||
DB_NAME: 'cognee_db'
|
||||
DB_HOST: '127.0.0.1'
|
||||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run python ./examples/python/permissions_example.py
|
||||
|
||||
test_docling_add:
|
||||
name: Run Add with Docling Test
|
||||
runs-on: macos-15
|
||||
|
|
|
|||
2
.github/workflows/label-core-team.yml
vendored
2
.github/workflows/label-core-team.yml
vendored
|
|
@ -72,3 +72,5 @@ jobs:
|
|||
} catch (error) {
|
||||
core.warning(`Failed to add label: ${error.message}`);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
2
.github/workflows/load_tests.yml
vendored
2
.github/workflows/load_tests.yml
vendored
|
|
@ -66,3 +66,5 @@ jobs:
|
|||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
|
||||
run: uv run python ./cognee/tests/test_load.py
|
||||
|
||||
|
||||
|
|
|
|||
5
.github/workflows/pre_test.yml
vendored
5
.github/workflows/pre_test.yml
vendored
|
|
@ -5,7 +5,7 @@ permissions:
|
|||
contents: read
|
||||
jobs:
|
||||
check-uv-lock:
|
||||
name: Lockfile and Pre-commit Hooks
|
||||
name: Validate uv lockfile and project metadata
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
|
|
@ -20,6 +20,3 @@ jobs:
|
|||
|
||||
- name: Validate uv lockfile and project metadata
|
||||
run: uv lock --check || { echo "'uv lock --check' failed."; echo "Run 'uv lock' and push your changes."; exit 1; }
|
||||
|
||||
- name: Run pre-commit hooks
|
||||
uses: pre-commit/action@v3.0.1
|
||||
|
|
|
|||
28
.github/workflows/release.yml
vendored
28
.github/workflows/release.yml
vendored
|
|
@ -136,31 +136,3 @@ jobs:
|
|||
flavour=${{ inputs.flavour }}
|
||||
cache-from: type=registry,ref=cognee/cognee:buildcache
|
||||
cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max
|
||||
|
||||
trigger-docs-test-suite:
|
||||
needs: release-pypi-package
|
||||
if: ${{ inputs.flavour == 'main' }}
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Trigger docs tests
|
||||
run: |
|
||||
curl -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/topoteretes/cognee-docs/dispatches \
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
|
||||
trigger-community-test-suite:
|
||||
needs: release-pypi-package
|
||||
if: ${{ inputs.flavour == 'main' }}
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Trigger community tests
|
||||
run: |
|
||||
curl -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/topoteretes/cognee-community/dispatches \
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
|
|
|
|||
46
.github/workflows/search_db_tests.yml
vendored
46
.github/workflows/search_db_tests.yml
vendored
|
|
@ -11,21 +11,12 @@ on:
|
|||
type: string
|
||||
default: "all"
|
||||
description: "Which vector databases to test (comma-separated list or 'all')"
|
||||
python-versions:
|
||||
required: false
|
||||
type: string
|
||||
default: '["3.10", "3.11", "3.12", "3.13"]'
|
||||
description: "Python versions to test (JSON array)"
|
||||
|
||||
jobs:
|
||||
run-kuzu-lance-sqlite-search-tests:
|
||||
name: Search test for Kuzu/LanceDB/Sqlite (Python ${{ matrix.python-version }})
|
||||
name: Search test for Kuzu/LanceDB/Sqlite
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/lance/sqlite') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -35,7 +26,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Dependencies already installed
|
||||
run: echo "Dependencies already installed in setup"
|
||||
|
|
@ -54,16 +45,13 @@ jobs:
|
|||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
||||
VECTOR_DB_PROVIDER: 'lancedb'
|
||||
DB_PROVIDER: 'sqlite'
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
|
||||
run-neo4j-lance-sqlite-search-tests:
|
||||
name: Search test for Neo4j/LanceDB/Sqlite (Python ${{ matrix.python-version }})
|
||||
name: Search test for Neo4j/LanceDB/Sqlite
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/lance/sqlite') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -73,7 +61,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Setup Neo4j with GDS
|
||||
uses: ./.github/actions/setup_neo4j
|
||||
|
|
@ -100,16 +88,12 @@ jobs:
|
|||
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
|
||||
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
|
||||
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
|
||||
run-kuzu-pgvector-postgres-search-tests:
|
||||
name: Search test for Kuzu/PGVector/Postgres (Python ${{ matrix.python-version }})
|
||||
name: Search test for Kuzu/PGVector/Postgres
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/pgvector/postgres') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
|
|
@ -133,7 +117,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
extra-dependencies: "postgres"
|
||||
|
||||
- name: Dependencies already installed
|
||||
|
|
@ -159,16 +143,12 @@ jobs:
|
|||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
|
||||
run-neo4j-pgvector-postgres-search-tests:
|
||||
name: Search test for Neo4j/PGVector/Postgres (Python ${{ matrix.python-version }})
|
||||
name: Search test for Neo4j/PGVector/Postgres
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/pgvector/postgres') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
|
|
@ -192,7 +172,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ inputs.python-version }}
|
||||
extra-dependencies: "postgres"
|
||||
|
||||
- name: Setup Neo4j with GDS
|
||||
|
|
@ -225,4 +205,4 @@ jobs:
|
|||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
|
|
|
|||
49
.github/workflows/vector_db_tests.yml
vendored
49
.github/workflows/vector_db_tests.yml
vendored
|
|
@ -103,55 +103,6 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_pgvector.py
|
||||
|
||||
run-pgvector-multi-user-tests:
|
||||
name: PGVector Multi-User Tests
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'postgres') }}
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
env:
|
||||
POSTGRES_USER: cognee
|
||||
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
|
||||
POSTGRES_DB: cognee_db
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
ports:
|
||||
- 5432:5432
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
extra-dependencies: "postgres"
|
||||
|
||||
- name: Run PGVector Permissions Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
ENABLE_BACKEND_ACCESS_CONTROL: 'true'
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
VECTOR_DB_URL: 127.0.0.1
|
||||
VECTOR_DB_PORT: 5432
|
||||
VECTOR_DB_USERNAME: cognee
|
||||
VECTOR_DB_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
|
||||
VECTOR_DATASET_DATABASE_HANDLER: pgvector
|
||||
run: uv run python ./cognee/tests/test_permissions.py
|
||||
|
||||
run-lancedb-tests:
|
||||
name: LanceDB Tests
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -147,8 +147,6 @@ venv/
|
|||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
mise.toml
|
||||
deployment/helm/values-local.yml
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ repos:
|
|||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^deployment/helm/templates/
|
||||
- id: check-added-large-files
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
|
|
|
|||
|
|
@ -128,3 +128,5 @@ MCP server and Frontend:
|
|||
## CI Mirrors Local Commands
|
||||
|
||||
Our GitHub Actions run the same ruff checks and pytest suites shown above (`.github/workflows/basic_tests.yml` and related workflows). Use the commands in this document locally to minimize CI surprises.
|
||||
|
||||
|
||||
|
|
|
|||
590
CLAUDE.md
590
CLAUDE.md
|
|
@ -1,590 +0,0 @@
|
|||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
Cognee is an open-source AI memory platform that transforms raw data into persistent knowledge graphs for AI agents. It replaces traditional RAG (Retrieval-Augmented Generation) with an ECL (Extract, Cognify, Load) pipeline combining vector search, graph databases, and LLM-powered entity extraction.
|
||||
|
||||
**Requirements**: Python 3.9 - 3.12
|
||||
|
||||
## Development Commands
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
# Create virtual environment (recommended: uv)
|
||||
uv venv && source .venv/bin/activate
|
||||
|
||||
# Install with pip, poetry, or uv
|
||||
uv pip install -e .
|
||||
|
||||
# Install with dev dependencies
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
# Install with specific extras
|
||||
uv pip install -e ".[postgres,neo4j,docs,chromadb]"
|
||||
|
||||
# Set up pre-commit hooks
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
### Available Installation Extras
|
||||
- **postgres** / **postgres-binary** - PostgreSQL + PGVector support
|
||||
- **neo4j** - Neo4j graph database support
|
||||
- **neptune** - AWS Neptune support
|
||||
- **chromadb** - ChromaDB vector database
|
||||
- **docs** - Document processing (unstructured library)
|
||||
- **scraping** - Web scraping (Tavily, BeautifulSoup, Playwright)
|
||||
- **langchain** - LangChain integration
|
||||
- **llama-index** - LlamaIndex integration
|
||||
- **anthropic** - Anthropic Claude models
|
||||
- **gemini** - Google Gemini models
|
||||
- **ollama** - Ollama local models
|
||||
- **mistral** - Mistral AI models
|
||||
- **groq** - Groq API support
|
||||
- **llama-cpp** - Llama.cpp local inference
|
||||
- **huggingface** - HuggingFace transformers
|
||||
- **aws** - S3 storage backend
|
||||
- **redis** - Redis caching
|
||||
- **graphiti** - Graphiti-core integration
|
||||
- **baml** - BAML structured output
|
||||
- **dlt** - Data load tool (dlt) integration
|
||||
- **docling** - Docling document processing
|
||||
- **codegraph** - Code graph extraction
|
||||
- **evals** - Evaluation tools
|
||||
- **deepeval** - DeepEval testing framework
|
||||
- **posthog** - PostHog analytics
|
||||
- **monitoring** - Sentry + Langfuse observability
|
||||
- **distributed** - Modal distributed execution
|
||||
- **dev** - All development tools (pytest, mypy, ruff, etc.)
|
||||
- **debug** - Debugpy for debugging
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=cognee --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest cognee/tests/test_custom_model.py
|
||||
|
||||
# Run specific test function
|
||||
pytest cognee/tests/test_custom_model.py::test_function_name
|
||||
|
||||
# Run async tests
|
||||
pytest -v cognee/tests/integration/
|
||||
|
||||
# Run unit tests only
|
||||
pytest cognee/tests/unit/
|
||||
|
||||
# Run integration tests only
|
||||
pytest cognee/tests/integration/
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
```bash
|
||||
# Run ruff linter
|
||||
ruff check .
|
||||
|
||||
# Run ruff formatter
|
||||
ruff format .
|
||||
|
||||
# Run both linting and formatting (pre-commit)
|
||||
pre-commit run --all-files
|
||||
|
||||
# Type checking with mypy
|
||||
mypy cognee/
|
||||
|
||||
# Run pylint
|
||||
pylint cognee/
|
||||
```
|
||||
|
||||
### Running Cognee
|
||||
```bash
|
||||
# Using Python SDK
|
||||
python examples/python/simple_example.py
|
||||
|
||||
# Using CLI
|
||||
cognee-cli add "Your text here"
|
||||
cognee-cli cognify
|
||||
cognee-cli search "Your query"
|
||||
cognee-cli delete --all
|
||||
|
||||
# Launch full stack with UI
|
||||
cognee-cli -ui
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Core Workflow: add → cognify → search/memify
|
||||
|
||||
1. **add()** - Ingest data (files, URLs, text) into datasets
|
||||
2. **cognify()** - Extract entities/relationships and build knowledge graph
|
||||
3. **search()** - Query knowledge using various retrieval strategies
|
||||
4. **memify()** - Enrich graph with additional context and rules
|
||||
|
||||
### Key Architectural Patterns
|
||||
|
||||
#### 1. Pipeline-Based Processing
|
||||
All data flows through task-based pipelines (`cognee/modules/pipelines/`). Tasks are composable units that can run sequentially or in parallel. Example pipeline tasks: `classify_documents`, `extract_graph_from_data`, `add_data_points`.
|
||||
|
||||
#### 2. Interface-Based Database Adapters
|
||||
Multiple backends are supported through adapter interfaces:
|
||||
- **Graph**: Kuzu (default), Neo4j, Neptune via `GraphDBInterface`
|
||||
- **Vector**: LanceDB (default), ChromaDB, PGVector via `VectorDBInterface`
|
||||
- **Relational**: SQLite (default), PostgreSQL
|
||||
|
||||
Key files:
|
||||
- `cognee/infrastructure/databases/graph/graph_db_interface.py`
|
||||
- `cognee/infrastructure/databases/vector/vector_db_interface.py`
|
||||
|
||||
#### 3. Multi-Tenant Access Control
|
||||
User → Dataset → Data hierarchy with permission-based filtering. Enable with `ENABLE_BACKEND_ACCESS_CONTROL=True`. Each user+dataset combination can have isolated graph/vector databases (when using supported backends: Kuzu, LanceDB, SQLite, Postgres).
|
||||
|
||||
### Layer Structure
|
||||
|
||||
```
|
||||
API Layer (cognee/api/v1/)
|
||||
↓
|
||||
Main Functions (add, cognify, search, memify)
|
||||
↓
|
||||
Pipeline Orchestrator (cognee/modules/pipelines/)
|
||||
↓
|
||||
Task Execution Layer (cognee/tasks/)
|
||||
↓
|
||||
Domain Modules (graph, retrieval, ingestion, etc.)
|
||||
↓
|
||||
Infrastructure Adapters (LLM, databases)
|
||||
↓
|
||||
External Services (OpenAI, Kuzu, LanceDB, etc.)
|
||||
```
|
||||
|
||||
### Critical Data Flow Paths
|
||||
|
||||
#### ADD: Data Ingestion
|
||||
`add()` → `resolve_data_directories` → `ingest_data` → `save_data_item_to_storage` → Create Dataset + Data records in relational DB
|
||||
|
||||
Key files: `cognee/api/v1/add/add.py`, `cognee/tasks/ingestion/ingest_data.py`
|
||||
|
||||
#### COGNIFY: Knowledge Graph Construction
|
||||
`cognify()` → `classify_documents` → `extract_chunks_from_documents` → `extract_graph_from_data` (LLM extracts entities/relationships using Instructor) → `summarize_text` → `add_data_points` (store in graph + vector DBs)
|
||||
|
||||
Key files:
|
||||
- `cognee/api/v1/cognify/cognify.py`
|
||||
- `cognee/tasks/graph/extract_graph_from_data.py`
|
||||
- `cognee/tasks/storage/add_data_points.py`
|
||||
|
||||
#### SEARCH: Retrieval
|
||||
`search(query_text, query_type)` → route to retriever type → filter by permissions → return results
|
||||
|
||||
Available search types (from `cognee/modules/search/types/SearchType.py`):
|
||||
- **GRAPH_COMPLETION** (default) - Graph traversal + LLM completion
|
||||
- **GRAPH_SUMMARY_COMPLETION** - Uses pre-computed summaries with graph context
|
||||
- **GRAPH_COMPLETION_COT** - Chain-of-thought reasoning over graph
|
||||
- **GRAPH_COMPLETION_CONTEXT_EXTENSION** - Extended context graph retrieval
|
||||
- **TRIPLET_COMPLETION** - Triplet-based (subject-predicate-object) search
|
||||
- **RAG_COMPLETION** - Traditional RAG with chunks
|
||||
- **CHUNKS** - Vector similarity search over chunks
|
||||
- **CHUNKS_LEXICAL** - Lexical (keyword) search over chunks
|
||||
- **SUMMARIES** - Search pre-computed document summaries
|
||||
- **CYPHER** - Direct Cypher query execution (requires `ALLOW_CYPHER_QUERY=True`)
|
||||
- **NATURAL_LANGUAGE** - Natural language to structured query
|
||||
- **TEMPORAL** - Time-aware graph search
|
||||
- **FEELING_LUCKY** - Automatic search type selection
|
||||
- **FEEDBACK** - User feedback-based refinement
|
||||
- **CODING_RULES** - Code-specific search rules
|
||||
|
||||
Key files:
|
||||
- `cognee/api/v1/search/search.py`
|
||||
- `cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py`
|
||||
- `cognee/modules/search/types/SearchType.py`
|
||||
|
||||
### Core Data Models
|
||||
|
||||
#### Engine Models (`cognee/infrastructure/engine/models/`)
|
||||
- **DataPoint** - Base class for all graph nodes (versioned, with metadata)
|
||||
- **Edge** - Graph relationships (source, target, relationship type)
|
||||
- **Triplet** - (Subject, Predicate, Object) representation
|
||||
|
||||
#### Graph Models (`cognee/shared/data_models.py`)
|
||||
- **KnowledgeGraph** - Container for nodes and edges
|
||||
- **Node** - Entity (id, name, type, description)
|
||||
- **Edge** - Relationship (source_node_id, target_node_id, relationship_name)
|
||||
|
||||
### Key Infrastructure Components
|
||||
|
||||
#### LLM Gateway (`cognee/infrastructure/llm/LLMGateway.py`)
|
||||
Unified interface for multiple LLM providers: OpenAI, Anthropic, Gemini, Ollama, Mistral, Bedrock. Uses Instructor for structured output extraction.
|
||||
|
||||
#### Embedding Engines
|
||||
Factory pattern for embeddings: `cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py`
|
||||
|
||||
#### Document Loaders
|
||||
Support for PDF, DOCX, CSV, images, audio, code files in `cognee/infrastructure/files/`
|
||||
|
||||
## Important Configuration
|
||||
|
||||
### Environment Setup
|
||||
Copy `.env.template` to `.env` and configure:
|
||||
|
||||
```bash
|
||||
# Minimal setup (defaults to OpenAI + local file-based databases)
|
||||
LLM_API_KEY="your_openai_api_key"
|
||||
LLM_MODEL="openai/gpt-4o-mini" # Default model
|
||||
```
|
||||
|
||||
**Important**: If you configure only LLM or only embeddings, the other defaults to OpenAI. Ensure you have a working OpenAI API key, or configure both to avoid unexpected defaults.
|
||||
|
||||
Default databases (no extra setup needed):
|
||||
- **Relational**: SQLite (metadata and state storage)
|
||||
- **Vector**: LanceDB (embeddings for semantic search)
|
||||
- **Graph**: Kuzu (knowledge graph and relationships)
|
||||
|
||||
All stored in `.venv` by default. Override with `DATA_ROOT_DIRECTORY` and `SYSTEM_ROOT_DIRECTORY`.
|
||||
|
||||
### Switching Databases
|
||||
|
||||
#### Relational Databases
|
||||
```bash
|
||||
# PostgreSQL (requires postgres extra: pip install cognee[postgres])
|
||||
DB_PROVIDER=postgres
|
||||
DB_HOST=localhost
|
||||
DB_PORT=5432
|
||||
DB_USERNAME=cognee
|
||||
DB_PASSWORD=cognee
|
||||
DB_NAME=cognee_db
|
||||
```
|
||||
|
||||
#### Vector Databases
|
||||
Supported: lancedb (default), pgvector, chromadb, qdrant, weaviate, milvus
|
||||
```bash
|
||||
# ChromaDB (requires chromadb extra)
|
||||
VECTOR_DB_PROVIDER=chromadb
|
||||
|
||||
# PGVector (requires postgres extra)
|
||||
VECTOR_DB_PROVIDER=pgvector
|
||||
VECTOR_DB_URL=postgresql://cognee:cognee@localhost:5432/cognee_db
|
||||
```
|
||||
|
||||
#### Graph Databases
|
||||
Supported: kuzu (default), neo4j, neptune, kuzu-remote
|
||||
```bash
|
||||
# Neo4j (requires neo4j extra: pip install cognee[neo4j])
|
||||
GRAPH_DATABASE_PROVIDER=neo4j
|
||||
GRAPH_DATABASE_URL=bolt://localhost:7687
|
||||
GRAPH_DATABASE_NAME=neo4j
|
||||
GRAPH_DATABASE_USERNAME=neo4j
|
||||
GRAPH_DATABASE_PASSWORD=yourpassword
|
||||
|
||||
# Remote Kuzu
|
||||
GRAPH_DATABASE_PROVIDER=kuzu-remote
|
||||
GRAPH_DATABASE_URL=http://localhost:8000
|
||||
GRAPH_DATABASE_USERNAME=your_username
|
||||
GRAPH_DATABASE_PASSWORD=your_password
|
||||
```
|
||||
|
||||
### LLM Provider Configuration
|
||||
|
||||
Supported providers: OpenAI (default), Azure OpenAI, Google Gemini, Anthropic, AWS Bedrock, Ollama, LM Studio, Custom (OpenAI-compatible APIs)
|
||||
|
||||
#### OpenAI (Recommended - Minimal Setup)
|
||||
```bash
|
||||
LLM_API_KEY="your_openai_api_key"
|
||||
LLM_MODEL="openai/gpt-4o-mini" # or gpt-4o, gpt-4-turbo, etc.
|
||||
LLM_PROVIDER="openai"
|
||||
```
|
||||
|
||||
#### Azure OpenAI
|
||||
```bash
|
||||
LLM_PROVIDER="azure"
|
||||
LLM_MODEL="azure/gpt-4o-mini"
|
||||
LLM_ENDPOINT="https://YOUR-RESOURCE.openai.azure.com/openai/deployments/gpt-4o-mini"
|
||||
LLM_API_KEY="your_azure_api_key"
|
||||
LLM_API_VERSION="2024-12-01-preview"
|
||||
```
|
||||
|
||||
#### Google Gemini (requires gemini extra)
|
||||
```bash
|
||||
LLM_PROVIDER="gemini"
|
||||
LLM_MODEL="gemini/gemini-2.0-flash-exp"
|
||||
LLM_API_KEY="your_gemini_api_key"
|
||||
```
|
||||
|
||||
#### Anthropic Claude (requires anthropic extra)
|
||||
```bash
|
||||
LLM_PROVIDER="anthropic"
|
||||
LLM_MODEL="claude-3-5-sonnet-20241022"
|
||||
LLM_API_KEY="your_anthropic_api_key"
|
||||
```
|
||||
|
||||
#### Ollama (Local - requires ollama extra)
|
||||
```bash
|
||||
LLM_PROVIDER="ollama"
|
||||
LLM_MODEL="llama3.1:8b"
|
||||
LLM_ENDPOINT="http://localhost:11434/v1"
|
||||
LLM_API_KEY="ollama"
|
||||
EMBEDDING_PROVIDER="ollama"
|
||||
EMBEDDING_MODEL="nomic-embed-text:latest"
|
||||
EMBEDDING_ENDPOINT="http://localhost:11434/api/embed"
|
||||
HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"
|
||||
```
|
||||
|
||||
#### Custom / OpenRouter / vLLM
|
||||
```bash
|
||||
LLM_PROVIDER="custom"
|
||||
LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
|
||||
LLM_ENDPOINT="https://openrouter.ai/api/v1"
|
||||
LLM_API_KEY="your_api_key"
|
||||
```
|
||||
|
||||
#### AWS Bedrock (requires aws extra)
|
||||
```bash
|
||||
LLM_PROVIDER="bedrock"
|
||||
LLM_MODEL="anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
AWS_REGION="us-east-1"
|
||||
AWS_ACCESS_KEY_ID="your_access_key"
|
||||
AWS_SECRET_ACCESS_KEY="your_secret_key"
|
||||
# Optional for temporary credentials:
|
||||
# AWS_SESSION_TOKEN="your_session_token"
|
||||
```
|
||||
|
||||
#### LLM Rate Limiting
|
||||
```bash
|
||||
LLM_RATE_LIMIT_ENABLED=true
|
||||
LLM_RATE_LIMIT_REQUESTS=60 # Requests per interval
|
||||
LLM_RATE_LIMIT_INTERVAL=60 # Interval in seconds
|
||||
```
|
||||
|
||||
#### Instructor Mode (Structured Output)
|
||||
```bash
|
||||
# LLM_INSTRUCTOR_MODE controls how structured data is extracted
|
||||
# Each LLM has its own default (e.g., gpt-4o models use "json_schema_mode")
|
||||
# Override if needed:
|
||||
LLM_INSTRUCTOR_MODE="json_schema_mode" # or "tool_call", "md_json", etc.
|
||||
```
|
||||
|
||||
### Structured Output Framework
|
||||
```bash
|
||||
# Use Instructor (default, via litellm)
|
||||
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
|
||||
|
||||
# Or use BAML (requires baml extra: pip install cognee[baml])
|
||||
STRUCTURED_OUTPUT_FRAMEWORK="baml"
|
||||
BAML_LLM_PROVIDER=openai
|
||||
BAML_LLM_MODEL="gpt-4o-mini"
|
||||
BAML_LLM_API_KEY="your_api_key"
|
||||
```
|
||||
|
||||
### Storage Backend
|
||||
```bash
|
||||
# Local filesystem (default)
|
||||
STORAGE_BACKEND="local"
|
||||
|
||||
# S3 (requires aws extra: pip install cognee[aws])
|
||||
STORAGE_BACKEND="s3"
|
||||
STORAGE_BUCKET_NAME="your-bucket-name"
|
||||
AWS_REGION="us-east-1"
|
||||
AWS_ACCESS_KEY_ID="your_access_key"
|
||||
AWS_SECRET_ACCESS_KEY="your_secret_key"
|
||||
DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
|
||||
SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
|
||||
```
|
||||
|
||||
## Extension Points
|
||||
|
||||
### Adding New Functionality
|
||||
|
||||
1. **New Task Type**: Create task function in `cognee/tasks/`, return Task object, register in pipeline
|
||||
2. **New Database Backend**: Implement `GraphDBInterface` or `VectorDBInterface` in `cognee/infrastructure/databases/`
|
||||
3. **New LLM Provider**: Add configuration in LLM config (uses litellm)
|
||||
4. **New Document Processor**: Extend loaders in `cognee/modules/data/processing/`
|
||||
5. **New Search Type**: Add to `SearchType` enum and implement retriever in `cognee/modules/retrieval/`
|
||||
6. **Custom Graph Models**: Define Pydantic models extending `DataPoint` in your code
|
||||
|
||||
### Working with Ontologies
|
||||
Cognee supports ontology-based entity extraction to ground knowledge graphs in standardized semantic frameworks (e.g., OWL ontologies).
|
||||
|
||||
Configuration:
|
||||
```bash
|
||||
ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and OWL files
|
||||
MATCHING_STRATEGY=fuzzy # Default: fuzzy matching with 80% similarity
|
||||
ONTOLOGY_FILE_PATH=/path/to/your/ontology.owl # Full path to ontology file
|
||||
```
|
||||
|
||||
Implementation: `cognee/modules/ontology/`
|
||||
|
||||
## Branching Strategy
|
||||
|
||||
**IMPORTANT**: Always branch from `dev`, not `main`. The `dev` branch is the active development branch.
|
||||
|
||||
```bash
|
||||
git checkout dev
|
||||
git pull origin dev
|
||||
git checkout -b feature/your-feature-name
|
||||
```
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Formatter**: Ruff (configured in `pyproject.toml`)
|
||||
- **Line length**: 100 characters
|
||||
- **String quotes**: Use double quotes `"` not single quotes `'` (enforced by ruff-format)
|
||||
- **Pre-commit hooks**: Run ruff linting and formatting automatically
|
||||
- **Type hints**: Encouraged (mypy checks enabled)
|
||||
- **Important**: Always run `pre-commit run --all-files` before committing to catch formatting issues
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
Tests are organized in `cognee/tests/`:
|
||||
- `unit/` - Unit tests for individual modules
|
||||
- `integration/` - Full pipeline integration tests
|
||||
- `cli_tests/` - CLI command tests
|
||||
- `tasks/` - Task-specific tests
|
||||
|
||||
When adding features, add corresponding tests. Integration tests should cover the full add → cognify → search flow.
|
||||
|
||||
## API Structure
|
||||
|
||||
FastAPI application with versioned routes under `cognee/api/v1/`:
|
||||
- `/add` - Data ingestion
|
||||
- `/cognify` - Knowledge graph processing
|
||||
- `/search` - Query interface
|
||||
- `/memify` - Graph enrichment
|
||||
- `/datasets` - Dataset management
|
||||
- `/users` - Authentication (if `REQUIRE_AUTHENTICATION=True`)
|
||||
- `/visualize` - Graph visualization server
|
||||
|
||||
## Python SDK Entry Points
|
||||
|
||||
Main functions exported from `cognee/__init__.py`:
|
||||
- `add(data, dataset_name)` - Ingest data
|
||||
- `cognify(datasets)` - Build knowledge graph
|
||||
- `search(query_text, query_type)` - Query knowledge
|
||||
- `memify(extraction_tasks, enrichment_tasks)` - Enrich graph
|
||||
- `delete(data_id)` - Remove data
|
||||
- `config()` - Configuration management
|
||||
- `datasets()` - Dataset operations
|
||||
|
||||
All functions are async - use `await` or `asyncio.run()`.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Several security environment variables in `.env`:
|
||||
- `ACCEPT_LOCAL_FILE_PATH` - Allow local file paths (default: True)
|
||||
- `ALLOW_HTTP_REQUESTS` - Allow HTTP requests from Cognee (default: True)
|
||||
- `ALLOW_CYPHER_QUERY` - Allow raw Cypher queries (default: True)
|
||||
- `REQUIRE_AUTHENTICATION` - Enable API authentication (default: False)
|
||||
- `ENABLE_BACKEND_ACCESS_CONTROL` - Multi-tenant isolation (default: True)
|
||||
|
||||
For production deployments, review and tighten these settings.
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Creating a Custom Pipeline Task
|
||||
```python
|
||||
from cognee.modules.pipelines.tasks.Task import Task
|
||||
|
||||
async def my_custom_task(data):
|
||||
# Your logic here
|
||||
processed_data = process(data)
|
||||
return processed_data
|
||||
|
||||
# Use in pipeline
|
||||
task = Task(my_custom_task)
|
||||
```
|
||||
|
||||
### Accessing Databases Directly
|
||||
```python
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
vector_engine = await get_vector_engine()
|
||||
```
|
||||
|
||||
### Using LLM Gateway
|
||||
```python
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
llm_client = get_llm_client()
|
||||
response = await llm_client.acreate_structured_output(
|
||||
text_input="Your prompt",
|
||||
system_prompt="System instructions",
|
||||
response_model=YourPydanticModel
|
||||
)
|
||||
```
|
||||
|
||||
## Key Concepts
|
||||
|
||||
### Datasets
|
||||
Datasets are project-level containers that support organization, permissions, and isolated processing workflows. Each user can have multiple datasets with different access permissions.
|
||||
|
||||
```python
|
||||
# Create/use a dataset
|
||||
await cognee.add(data, dataset_name="my_project")
|
||||
await cognee.cognify(datasets=["my_project"])
|
||||
```
|
||||
|
||||
### DataPoints
|
||||
Atomic knowledge units that form the foundation of graph structures. All graph nodes extend the `DataPoint` base class with versioning and metadata support.
|
||||
|
||||
### Permissions System
|
||||
Multi-tenant architecture with users, roles, and Access Control Lists (ACLs):
|
||||
- Read, write, delete, and share permissions per dataset
|
||||
- Enable with `ENABLE_BACKEND_ACCESS_CONTROL=True`
|
||||
- Supports isolated databases per user+dataset (Kuzu, LanceDB, SQLite, Postgres)
|
||||
|
||||
### Graph Visualization
|
||||
Launch visualization server:
|
||||
```bash
|
||||
# Via CLI
|
||||
cognee-cli -ui # Launches full stack with UI at http://localhost:3000
|
||||
|
||||
# Via Python
|
||||
from cognee.api.v1.visualize import start_visualization_server
|
||||
await start_visualization_server(port=8080)
|
||||
```
|
||||
|
||||
## Debugging & Troubleshooting
|
||||
|
||||
### Debug Configuration
|
||||
- Set `LITELLM_LOG="DEBUG"` for verbose LLM logs (default: "ERROR")
|
||||
- Enable debug mode: `ENV="development"` or `ENV="debug"`
|
||||
- Disable telemetry: `TELEMETRY_DISABLED=1`
|
||||
- Check logs in structured format (uses structlog)
|
||||
- Use `debugpy` optional dependency for debugging: `pip install cognee[debug]`
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Ollama + OpenAI Embeddings NoDataError**
|
||||
- Issue: Mixing Ollama with OpenAI embeddings can cause errors
|
||||
- Solution: Configure both LLM and embeddings to use the same provider, or ensure `HUGGINGFACE_TOKENIZER` is set when using Ollama
|
||||
|
||||
**LM Studio Structured Output**
|
||||
- Issue: LM Studio requires explicit instructor mode
|
||||
- Solution: Set `LLM_INSTRUCTOR_MODE="json_schema_mode"` (or appropriate mode)
|
||||
|
||||
**Default Provider Fallback**
|
||||
- Issue: Configuring only LLM or only embeddings defaults the other to OpenAI
|
||||
- Solution: Always configure both LLM and embedding providers, or ensure valid OpenAI API key
|
||||
|
||||
**Permission Denied on Search**
|
||||
- Behavior: Returns empty list rather than error (prevents information leakage)
|
||||
- Solution: Check dataset permissions and user access rights
|
||||
|
||||
**Database Connection Issues**
|
||||
- Check: Verify database URLs, credentials, and that services are running
|
||||
- Docker users: Use `DB_HOST=host.docker.internal` for local databases
|
||||
|
||||
**Rate Limiting Errors**
|
||||
- Enable client-side rate limiting: `LLM_RATE_LIMIT_ENABLED=true`
|
||||
- Adjust limits: `LLM_RATE_LIMIT_REQUESTS` and `LLM_RATE_LIMIT_INTERVAL`
|
||||
|
||||
## Resources
|
||||
|
||||
- [Documentation](https://docs.cognee.ai/)
|
||||
- [Discord Community](https://discord.gg/NQPKmU5CCg)
|
||||
- [GitHub Issues](https://github.com/topoteretes/cognee/issues)
|
||||
- [Example Notebooks](examples/python/)
|
||||
- [Research Paper](https://arxiv.org/abs/2505.24478) - Optimizing knowledge graphs for LLM reasoning
|
||||
|
|
@ -62,11 +62,6 @@ Looking for a place to start? Try filtering for [good first issues](https://gith
|
|||
|
||||
## 2. 🛠️ Development Setup
|
||||
|
||||
### Required tools
|
||||
* [Python](https://www.python.org/downloads/)
|
||||
* [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
||||
* pre-commit: `uv run pip install pre-commit && pre-commit install`
|
||||
|
||||
### Fork and Clone
|
||||
|
||||
1. Fork the [**cognee**](https://github.com/topoteretes/cognee) repository
|
||||
|
|
@ -98,26 +93,8 @@ git checkout -b feature/your-feature-name
|
|||
4. **Commits**: Write clear commit messages
|
||||
|
||||
### Running Tests
|
||||
|
||||
Rename `.env.example` into `.env` and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
```shell
|
||||
uv run python cognee/tests/test_library.py
|
||||
```
|
||||
|
||||
### Running Simple Example
|
||||
|
||||
Rename `.env.example` into `.env` and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
|
||||
|
||||
```shell
|
||||
python examples/python/simple_example.py
|
||||
```
|
||||
or
|
||||
|
||||
```shell
|
||||
uv run python examples/python/simple_example.py
|
||||
python cognee/cognee/tests/test_library.py
|
||||
```
|
||||
|
||||
### Running Simple Example
|
||||
|
|
@ -137,7 +114,8 @@ uv run python cognee/cognee/examples/python/simple_example.py
|
|||
|
||||
## 4. 📤 Submitting Changes
|
||||
|
||||
1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure.
|
||||
1. Install ruff on your system
|
||||
2. Run ```ruff format .``` and ``` ruff check ``` and fix the issues
|
||||
3. Push your changes:
|
||||
```shell
|
||||
git add .
|
||||
|
|
|
|||
|
|
@ -32,14 +32,18 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
|
|||
|
||||
# Install the project's dependencies using the lockfile and settings
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --extra chromadb --frozen --no-install-project --no-dev --no-editable
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
|
||||
|
||||
# Copy Alembic configuration
|
||||
COPY alembic.ini /app/alembic.ini
|
||||
COPY alembic/ /app/alembic
|
||||
|
||||
# Then, add the rest of the project source code and install it
|
||||
# Installing separately from its dependencies allows optimal layer caching
|
||||
COPY ./cognee /app/cognee
|
||||
COPY ./distributed /app/distributed
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --extra chromadb --frozen --no-dev --no-editable
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
|
||||
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
|
|
|
|||
12
README.md
12
README.md
|
|
@ -66,13 +66,10 @@ Use your data to build personalized and dynamic memory for AI Agents. Cognee let
|
|||
## About Cognee
|
||||
|
||||
Cognee is an open-source tool and platform that transforms your raw data into persistent and dynamic AI memory for Agents. It combines vector search with graph databases to make your documents both searchable by meaning and connected by relationships.
|
||||
Cognee offers default memory creation and search which we describe bellow. But with Cognee you can build your own!
|
||||
|
||||
You can use Cognee in two ways:
|
||||
|
||||
1. [Self-host Cognee Open Source](https://docs.cognee.ai/getting-started/installation), which stores all data locally by default.
|
||||
2. [Connect to Cognee Cloud](https://platform.cognee.ai/), and get the same OSS stack on managed infrastructure for easier development and productionization.
|
||||
|
||||
### Cognee Open Source (self-hosted):
|
||||
### Cognee Open Source:
|
||||
|
||||
- Interconnects any type of data — including past conversations, files, images, and audio transcriptions
|
||||
- Replaces traditional RAG systems with a unified memory layer built on graphs and vectors
|
||||
|
|
@ -80,11 +77,6 @@ You can use Cognee in two ways:
|
|||
- Provides Pythonic data pipelines for ingestion from 30+ data sources
|
||||
- Offers high customizability through user-defined tasks, modular pipelines, and built-in search endpoints
|
||||
|
||||
### Cognee Cloud (managed):
|
||||
- Hosted web UI dashboard
|
||||
- Automatic version updates
|
||||
- Resource usage analytics
|
||||
- GDPR compliant, enterprise-grade security
|
||||
|
||||
## Basic Usage & Feature Guide
|
||||
|
||||
|
|
|
|||
1
alembic/README
Normal file
1
alembic/README
Normal file
|
|
@ -0,0 +1 @@
|
|||
Generic single-database configuration with an async dbapi.
|
||||
3775
cognee-frontend/package-lock.json
generated
3775
cognee-frontend/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -9,15 +9,14 @@
|
|||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@auth0/nextjs-auth0": "^4.14.0",
|
||||
"@auth0/nextjs-auth0": "^4.13.1",
|
||||
"classnames": "^2.5.1",
|
||||
"culori": "^4.0.1",
|
||||
"d3-force-3d": "^3.0.6",
|
||||
"next": "^16.1.1",
|
||||
"react": "^19.2.3",
|
||||
"react-dom": "^19.2.3",
|
||||
"next": "16.1.1",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"react-force-graph-2d": "^1.27.1",
|
||||
"react-markdown": "^10.1.0",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@ import AddDataToCognee from "./AddDataToCognee";
|
|||
import NotebooksAccordion from "./NotebooksAccordion";
|
||||
import CogneeInstancesAccordion from "./CogneeInstancesAccordion";
|
||||
import InstanceDatasetsAccordion from "./InstanceDatasetsAccordion";
|
||||
import cloudFetch from "@/modules/instances/cloudFetch";
|
||||
import localFetch from "@/modules/instances/localFetch";
|
||||
|
||||
interface DashboardProps {
|
||||
user?: {
|
||||
|
|
@ -28,17 +26,6 @@ interface DashboardProps {
|
|||
accessToken: string;
|
||||
}
|
||||
|
||||
const cogneeInstances = {
|
||||
cloudCognee: {
|
||||
name: "CloudCognee",
|
||||
fetch: cloudFetch,
|
||||
},
|
||||
localCognee: {
|
||||
name: "LocalCognee",
|
||||
fetch: localFetch,
|
||||
}
|
||||
};
|
||||
|
||||
export default function Dashboard({ accessToken }: DashboardProps) {
|
||||
fetch.setAccessToken(accessToken);
|
||||
const { user } = useAuthenticatedUser();
|
||||
|
|
@ -51,7 +38,7 @@ export default function Dashboard({ accessToken }: DashboardProps) {
|
|||
updateNotebook,
|
||||
saveNotebook,
|
||||
removeNotebook,
|
||||
} = useNotebooks(cogneeInstances.localCognee);
|
||||
} = useNotebooks();
|
||||
|
||||
useEffect(() => {
|
||||
if (!notebooks.length) {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import { useCallback, useEffect } from "react";
|
|||
|
||||
import { fetch, isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { checkCloudConnection } from "@/modules/cloud";
|
||||
import { setApiKey } from "@/modules/instances/cloudFetch";
|
||||
import { CaretIcon, CloseIcon, CloudIcon, LocalCogneeIcon } from "@/ui/Icons";
|
||||
import { CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements";
|
||||
|
||||
|
|
@ -25,7 +24,6 @@ export default function InstanceDatasetsAccordion({ onDatasetsChange }: Instance
|
|||
const checkConnectionToCloudCognee = useCallback((apiKey?: string) => {
|
||||
if (apiKey) {
|
||||
fetch.setApiKey(apiKey);
|
||||
setApiKey(apiKey);
|
||||
}
|
||||
return checkCloudConnection()
|
||||
.then(setCloudCogneeConnected)
|
||||
|
|
|
|||
|
|
@ -95,7 +95,6 @@ function useDatasets(useCloud = false) {
|
|||
})
|
||||
.catch((error) => {
|
||||
console.error('Error fetching datasets:', error);
|
||||
throw error;
|
||||
});
|
||||
}, [useCloud]);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
// let numberOfRetries = 0;
|
||||
|
||||
const cloudApiUrl = process.env.NEXT_PUBLIC_CLOUD_API_URL || "http://localhost:8001";
|
||||
|
||||
let apiKey: string | null = process.env.NEXT_PUBLIC_COGWIT_API_KEY || null;
|
||||
|
||||
export function setApiKey(newApiKey: string) {
|
||||
apiKey = newApiKey;
|
||||
};
|
||||
|
||||
export default async function cloudFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
// function retry(lastError: Response) {
|
||||
// if (numberOfRetries >= 1) {
|
||||
// return Promise.reject(lastError);
|
||||
// }
|
||||
|
||||
// numberOfRetries += 1;
|
||||
|
||||
// return global.fetch("/auth/token")
|
||||
// .then(() => {
|
||||
// return fetch(url, options);
|
||||
// });
|
||||
// }
|
||||
|
||||
const authHeaders = {
|
||||
"Authorization": `X-Api-Key ${apiKey}`,
|
||||
};
|
||||
|
||||
return global.fetch(
|
||||
cloudApiUrl + "/api" + (typeof url === "string" ? url : url.toString()).replace("/v1", ""),
|
||||
{
|
||||
...options,
|
||||
headers: {
|
||||
...options.headers,
|
||||
...authHeaders,
|
||||
} as HeadersInit,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, true))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
// .finally(() => {
|
||||
// numberOfRetries = 0;
|
||||
// });
|
||||
}
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
const localApiUrl = process.env.NEXT_PUBLIC_LOCAL_API_URL || "http://localhost:8000";
|
||||
|
||||
export default async function localFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
return global.fetch(
|
||||
localApiUrl + "/api" + (typeof url === "string" ? url : url.toString()),
|
||||
{
|
||||
...options,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, false))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
}
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
export interface CogneeInstance {
|
||||
name: string;
|
||||
fetch: typeof global.fetch;
|
||||
}
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function createNotebook(notebookName: string, instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function deleteNotebook(notebookId: string, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
}
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function getNotebooks(instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
method: "GET",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
import { Cell } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function runNotebookCell(notebookId: string, cell: Cell, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) => response.json());
|
||||
}
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function saveNotebook(notebookId: string, notebookData: object, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
body: JSON.stringify(notebookData),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
|
|
@ -1,18 +1,20 @@
|
|||
import { useCallback, useState } from "react";
|
||||
import { fetch, isCloudEnvironment } from "@/utils";
|
||||
import { Cell, Notebook } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
import createNotebook from "./createNotebook";
|
||||
import deleteNotebook from "./deleteNotebook";
|
||||
import getNotebooks from "./getNotebooks";
|
||||
import runNotebookCell from "./runNotebookCell";
|
||||
import { default as persistNotebook } from "./saveNotebook";
|
||||
|
||||
function useNotebooks(instance: CogneeInstance) {
|
||||
function useNotebooks() {
|
||||
const [notebooks, setNotebooks] = useState<Notebook[]>([]);
|
||||
|
||||
const addNotebook = useCallback((notebookName: string) => {
|
||||
return createNotebook(notebookName, instance)
|
||||
.then((notebook: Notebook) => {
|
||||
return fetch("/v1/notebooks", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
.then((notebook) => {
|
||||
setNotebooks((notebooks) => [
|
||||
...notebooks,
|
||||
notebook,
|
||||
|
|
@ -20,29 +22,36 @@ function useNotebooks(instance: CogneeInstance) {
|
|||
|
||||
return notebook;
|
||||
});
|
||||
}, [instance]);
|
||||
}, []);
|
||||
|
||||
const removeNotebook = useCallback((notebookId: string) => {
|
||||
return deleteNotebook(notebookId, instance)
|
||||
return fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
}, isCloudEnvironment())
|
||||
.then(() => {
|
||||
setNotebooks((notebooks) =>
|
||||
notebooks.filter((notebook) => notebook.id !== notebookId)
|
||||
);
|
||||
});
|
||||
}, [instance]);
|
||||
}, []);
|
||||
|
||||
const fetchNotebooks = useCallback(() => {
|
||||
return getNotebooks(instance)
|
||||
return fetch("/v1/notebooks", {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
.then((notebooks) => {
|
||||
setNotebooks(notebooks);
|
||||
|
||||
return notebooks;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("Error fetching notebooks:", error.detail);
|
||||
console.error("Error fetching notebooks:", error);
|
||||
throw error
|
||||
});
|
||||
}, [instance]);
|
||||
}, []);
|
||||
|
||||
const updateNotebook = useCallback((updatedNotebook: Notebook) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
|
|
@ -55,13 +64,20 @@ function useNotebooks(instance: CogneeInstance) {
|
|||
}, []);
|
||||
|
||||
const saveNotebook = useCallback((notebook: Notebook) => {
|
||||
return persistNotebook(notebook.id, {
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}, instance);
|
||||
}, [instance]);
|
||||
return fetch(`/v1/notebooks/${notebook.id}`, {
|
||||
body: JSON.stringify({
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
}, []);
|
||||
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell) => {
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell, cogneeInstance: string) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
existingNotebook.id === notebook.id ? {
|
||||
|
|
@ -73,11 +89,20 @@ function useNotebooks(instance: CogneeInstance) {
|
|||
error: undefined,
|
||||
} : existingCell
|
||||
),
|
||||
} : existingNotebook
|
||||
} : notebook
|
||||
)
|
||||
);
|
||||
|
||||
return runNotebookCell(notebook.id, cell, instance)
|
||||
return fetch(`/v1/notebooks/${notebook.id}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, cogneeInstance === "cloud")
|
||||
.then((response) => response.json())
|
||||
.then((response) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
|
|
@ -90,11 +115,11 @@ function useNotebooks(instance: CogneeInstance) {
|
|||
error: response.error,
|
||||
} : existingCell
|
||||
),
|
||||
} : existingNotebook
|
||||
} : notebook
|
||||
)
|
||||
);
|
||||
});
|
||||
}, [instance]);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
notebooks,
|
||||
|
|
|
|||
|
|
@ -1,2 +1,3 @@
|
|||
export { default as Modal } from "./Modal";
|
||||
export { default as useModal } from "./useModal";
|
||||
|
||||
|
|
|
|||
|
|
@ -1,76 +0,0 @@
|
|||
import { memo } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
|
||||
interface MarkdownPreviewProps {
|
||||
content: string;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
function MarkdownPreview({ content, className = "" }: MarkdownPreviewProps) {
|
||||
return (
|
||||
<div className={`min-h-24 max-h-96 overflow-y-auto p-4 prose prose-sm max-w-none ${className}`}>
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
h1: ({ children }) => <h1 className="text-2xl font-bold mt-4 mb-2">{children}</h1>,
|
||||
h2: ({ children }) => <h2 className="text-xl font-bold mt-3 mb-2">{children}</h2>,
|
||||
h3: ({ children }) => <h3 className="text-lg font-bold mt-3 mb-2">{children}</h3>,
|
||||
h4: ({ children }) => <h4 className="text-base font-bold mt-2 mb-1">{children}</h4>,
|
||||
h5: ({ children }) => <h5 className="text-sm font-bold mt-2 mb-1">{children}</h5>,
|
||||
h6: ({ children }) => <h6 className="text-xs font-bold mt-2 mb-1">{children}</h6>,
|
||||
p: ({ children }) => <p className="mb-2">{children}</p>,
|
||||
ul: ({ children }) => <ul className="list-disc list-inside mb-2 ml-4">{children}</ul>,
|
||||
ol: ({ children }) => <ol className="list-decimal list-inside mb-2 ml-4">{children}</ol>,
|
||||
li: ({ children }) => <li className="mb-1">{children}</li>,
|
||||
blockquote: ({ children }) => (
|
||||
<blockquote className="border-l-4 border-gray-300 pl-4 italic my-2">{children}</blockquote>
|
||||
),
|
||||
code: ({ className, children, ...props }) => {
|
||||
const isInline = !className;
|
||||
return isInline ? (
|
||||
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
) : (
|
||||
<code className="block bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
pre: ({ children }) => (
|
||||
<pre className="bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto mb-2">
|
||||
{children}
|
||||
</pre>
|
||||
),
|
||||
a: ({ href, children }) => (
|
||||
<a href={href} className="text-blue-600 hover:underline" target="_blank" rel="noopener noreferrer">
|
||||
{children}
|
||||
</a>
|
||||
),
|
||||
strong: ({ children }) => <strong className="font-bold">{children}</strong>,
|
||||
em: ({ children }) => <em className="italic">{children}</em>,
|
||||
hr: () => <hr className="my-4 border-gray-300" />,
|
||||
table: ({ children }) => (
|
||||
<div className="overflow-x-auto my-2">
|
||||
<table className="min-w-full border border-gray-300">{children}</table>
|
||||
</div>
|
||||
),
|
||||
thead: ({ children }) => <thead className="bg-gray-100">{children}</thead>,
|
||||
tbody: ({ children }) => <tbody>{children}</tbody>,
|
||||
tr: ({ children }) => <tr className="border-b border-gray-300">{children}</tr>,
|
||||
th: ({ children }) => (
|
||||
<th className="border border-gray-300 px-4 py-2 text-left font-bold">
|
||||
{children}
|
||||
</th>
|
||||
),
|
||||
td: ({ children }) => (
|
||||
<td className="border border-gray-300 px-4 py-2">{children}</td>
|
||||
),
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(MarkdownPreview);
|
||||
|
|
@ -2,17 +2,15 @@
|
|||
|
||||
import { v4 as uuid4 } from "uuid";
|
||||
import classNames from "classnames";
|
||||
import { Fragment, MouseEvent, MutableRefObject, useCallback, useEffect, useRef, useState, memo } from "react";
|
||||
import { Fragment, MouseEvent, RefObject, useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import { useModal } from "@/ui/elements/Modal";
|
||||
import { CaretIcon, CloseIcon, PlusIcon } from "@/ui/Icons";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import { IconButton, PopupMenu, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import { GraphControlsAPI } from "@/app/(graph)/GraphControls";
|
||||
import GraphVisualization, { GraphVisualizationAPI } from "@/app/(graph)/GraphVisualization";
|
||||
|
||||
import NotebookCellHeader from "./NotebookCellHeader";
|
||||
import MarkdownPreview from "./MarkdownPreview";
|
||||
import { Cell, Notebook as NotebookType } from "./types";
|
||||
|
||||
interface NotebookProps {
|
||||
|
|
@ -21,186 +19,7 @@ interface NotebookProps {
|
|||
updateNotebook: (updatedNotebook: NotebookType) => void;
|
||||
}
|
||||
|
||||
interface NotebookCellProps {
|
||||
cell: Cell;
|
||||
index: number;
|
||||
isOpen: boolean;
|
||||
isMarkdownEditMode: boolean;
|
||||
onToggleOpen: () => void;
|
||||
onToggleMarkdownEdit: () => void;
|
||||
onContentChange: (value: string) => void;
|
||||
onCellRun: (cell: Cell, cogneeInstance: string) => Promise<void>;
|
||||
onCellRename: (cell: Cell) => void;
|
||||
onCellRemove: (cell: Cell) => void;
|
||||
onCellUp: (cell: Cell) => void;
|
||||
onCellDown: (cell: Cell) => void;
|
||||
onCellAdd: (afterCellIndex: number, cellType: "markdown" | "code") => void;
|
||||
}
|
||||
|
||||
const NotebookCell = memo(function NotebookCell({
|
||||
cell,
|
||||
index,
|
||||
isOpen,
|
||||
isMarkdownEditMode,
|
||||
onToggleOpen,
|
||||
onToggleMarkdownEdit,
|
||||
onContentChange,
|
||||
onCellRun,
|
||||
onCellRename,
|
||||
onCellRemove,
|
||||
onCellUp,
|
||||
onCellDown,
|
||||
onCellAdd,
|
||||
}: NotebookCellProps) {
|
||||
return (
|
||||
<Fragment>
|
||||
<div className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={onCellRun}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<div className="relative rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 overflow-hidden">
|
||||
<GhostButton
|
||||
onClick={onToggleMarkdownEdit}
|
||||
className="absolute top-2 right-2.5 text-xs leading-[1] !px-2 !py-1 !h-auto"
|
||||
>
|
||||
{isMarkdownEditMode ? "Preview" : "Edit"}
|
||||
</GhostButton>
|
||||
{isMarkdownEditMode ? (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="markdownInput"
|
||||
placeholder="Type your markdown here..."
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0 !bg-gray-50"
|
||||
/>
|
||||
) : (
|
||||
<MarkdownPreview content={cell.content} className="!bg-gray-50" />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => onCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => onCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
);
|
||||
});
|
||||
|
||||
export default function Notebook({ notebook, updateNotebook, runCell }: NotebookProps) {
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
const [markdownEditMode, setMarkdownEditMode] = useState<Set<string>>(new Set());
|
||||
|
||||
const toggleCellOpen = useCallback((id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
const toggleMarkdownEditMode = useCallback((id: string) => {
|
||||
setMarkdownEditMode((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id);
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (notebook.cells.length === 0) {
|
||||
const newCell: Cell = {
|
||||
|
|
@ -215,7 +34,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
});
|
||||
toggleCellOpen(newCell.id)
|
||||
}
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const handleCellRun = useCallback((cell: Cell, cogneeInstance: string) => {
|
||||
return runCell(notebook, cell, cogneeInstance);
|
||||
|
|
@ -224,7 +43,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
const handleCellAdd = useCallback((afterCellIndex: number, cellType: "markdown" | "code") => {
|
||||
const newCell: Cell = {
|
||||
id: uuid4(),
|
||||
name: cellType === "markdown" ? "Markdown Cell" : "Code Cell",
|
||||
name: "new cell",
|
||||
type: cellType,
|
||||
content: "",
|
||||
};
|
||||
|
|
@ -240,7 +59,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
|
||||
toggleCellOpen(newCell.id);
|
||||
updateNotebook(newNotebook);
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const removeCell = useCallback((cell: Cell, event?: MouseEvent) => {
|
||||
event?.preventDefault();
|
||||
|
|
@ -262,12 +81,14 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
openCellRemoveConfirmModal(cell);
|
||||
}, [openCellRemoveConfirmModal]);
|
||||
|
||||
const handleCellInputChange = useCallback((cellId: string, value: string) => {
|
||||
const handleCellInputChange = useCallback((notebook: NotebookType, cell: Cell, value: string) => {
|
||||
const newCell = {...cell, content: value };
|
||||
|
||||
updateNotebook({
|
||||
...notebook,
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === cellId ? {...cell, content: value} : cell)),
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === newCell.id ? newCell : cell)),
|
||||
});
|
||||
}, [notebook, updateNotebook]);
|
||||
}, [updateNotebook]);
|
||||
|
||||
const handleCellUp = useCallback((cell: Cell) => {
|
||||
const index = notebook.cells.indexOf(cell);
|
||||
|
|
@ -310,28 +131,133 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
}
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
|
||||
const toggleCellOpen = (id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="bg-white rounded-xl flex flex-col gap-0.5 px-7 py-5 flex-1">
|
||||
<div className="mb-5">{notebook.name}</div>
|
||||
|
||||
{notebook.cells.map((cell: Cell, index) => (
|
||||
<NotebookCell
|
||||
key={cell.id}
|
||||
cell={cell}
|
||||
index={index}
|
||||
isOpen={openCells.has(cell.id)}
|
||||
isMarkdownEditMode={markdownEditMode.has(cell.id)}
|
||||
onToggleOpen={() => toggleCellOpen(cell.id)}
|
||||
onToggleMarkdownEdit={() => toggleMarkdownEditMode(cell.id)}
|
||||
onContentChange={(value) => handleCellInputChange(cell.id, value)}
|
||||
onCellRun={handleCellRun}
|
||||
onCellRename={handleCellRename}
|
||||
onCellRemove={handleCellRemove}
|
||||
onCellUp={handleCellUp}
|
||||
onCellDown={handleCellDown}
|
||||
onCellAdd={handleCellAdd}
|
||||
/>
|
||||
<Fragment key={cell.id}>
|
||||
<div key={cell.id} className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={handleCellRun}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your text here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => handleCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => handleCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
))}
|
||||
</div>
|
||||
|
||||
|
|
@ -362,10 +288,6 @@ function CellResult({ content }: { content: [] }) {
|
|||
getSelectedNode: () => null,
|
||||
});
|
||||
|
||||
if (content.length === 0) {
|
||||
return <span>OK</span>;
|
||||
}
|
||||
|
||||
for (const line of content) {
|
||||
try {
|
||||
if (Array.isArray(line)) {
|
||||
|
|
@ -376,7 +298,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph</span>
|
||||
<GraphVisualization
|
||||
data={transformInsightsGraphData(line)}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -424,7 +346,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -434,7 +356,8 @@ function CellResult({ content }: { content: [] }) {
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
const datasets = Array.from(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
new Set(Object.values(line["datasets"]).map((dataset: any) => dataset.name))
|
||||
|
|
@ -446,46 +369,39 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="block px-2 py-2 whitespace-normal">{line["result"]}</span>
|
||||
</div>
|
||||
);
|
||||
|
||||
if (line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
if (typeof(line) === "object" && line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line["result"], null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
else if (typeof(line) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line, null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
else if (typeof(line) === "string") {
|
||||
if (typeof(line) === "string") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
} catch {
|
||||
// It is fine if we don't manage to parse the output line, we show it as it is.
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
|
|
@ -499,6 +415,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
{item}
|
||||
</div>
|
||||
));
|
||||
|
||||
};
|
||||
|
||||
function transformToVisualizationData(graph: { nodes: [], edges: [] }) {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,9 @@
|
|||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import classNames from "classnames";
|
||||
|
||||
import { isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { PlayIcon } from "@/ui/Icons";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton } from "@/ui/elements";
|
||||
import { PopupMenu, IconButton } from "@/ui/elements";
|
||||
import { LoadingIndicator } from "@/ui/App";
|
||||
|
||||
import { Cell } from "./types";
|
||||
|
|
@ -42,7 +39,7 @@ export default function NotebookCellHeader({
|
|||
if (runCell) {
|
||||
setIsRunningCell();
|
||||
runCell(cell, runInstance)
|
||||
.finally(() => {
|
||||
.then(() => {
|
||||
setIsNotRunningCell();
|
||||
});
|
||||
}
|
||||
|
|
@ -56,7 +53,7 @@ export default function NotebookCellHeader({
|
|||
{isRunningCell ? <LoadingIndicator /> : <IconButton onClick={handleCellRun}><PlayIcon /></IconButton>}
|
||||
</>
|
||||
)}
|
||||
<span className="ml-4">{cell.type === "markdown" ? "Markdown Cell" : cell.name}</span>
|
||||
<span className="ml-4">{cell.name}</span>
|
||||
</div>
|
||||
<div className="pr-4 flex flex-row items-center gap-8">
|
||||
{runCell && (
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
"use client";
|
||||
|
||||
import classNames from "classnames";
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useRef } from "react"
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useLayoutEffect, useRef } from "react"
|
||||
|
||||
interface TextAreaProps extends Omit<InputHTMLAttributes<HTMLTextAreaElement>, "onChange"> {
|
||||
isAutoExpanding?: boolean; // Set to true to enable auto-expanding text area behavior. Default is false.
|
||||
value?: string;
|
||||
onChange?: (value: string) => void;
|
||||
value: string;
|
||||
onChange: (value: string) => void;
|
||||
}
|
||||
|
||||
export default function TextArea({
|
||||
|
|
@ -19,81 +19,95 @@ export default function TextArea({
|
|||
placeholder = "",
|
||||
onKeyUp,
|
||||
...props
|
||||
}: TextAreaProps) {
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const maxHeightRef = useRef<number | null>(null);
|
||||
const throttleTimeoutRef = useRef<number | null>(null);
|
||||
const lastAdjustTimeRef = useRef<number>(0);
|
||||
const THROTTLE_MS = 250; // 4 calculations per second
|
||||
}: TextAreaProps) {
|
||||
const handleTextChange = useCallback((event: Event) => {
|
||||
const fakeTextAreaElement = event.target as HTMLDivElement;
|
||||
const newValue = fakeTextAreaElement.innerText;
|
||||
|
||||
const adjustHeight = useCallback(() => {
|
||||
if (!isAutoExpanding || !textareaRef.current) return;
|
||||
|
||||
const textarea = textareaRef.current;
|
||||
|
||||
// Cache maxHeight on first calculation
|
||||
if (maxHeightRef.current === null) {
|
||||
const computedStyle = getComputedStyle(textarea);
|
||||
maxHeightRef.current = computedStyle.maxHeight === "none"
|
||||
? Infinity
|
||||
: parseInt(computedStyle.maxHeight) || Infinity;
|
||||
}
|
||||
|
||||
// Reset height to auto to get the correct scrollHeight
|
||||
textarea.style.height = "auto";
|
||||
// Set height to scrollHeight, but respect max-height
|
||||
const scrollHeight = textarea.scrollHeight;
|
||||
textarea.style.height = `${Math.min(scrollHeight, maxHeightRef.current)}px`;
|
||||
lastAdjustTimeRef.current = Date.now();
|
||||
}, [isAutoExpanding]);
|
||||
|
||||
const handleChange = useCallback((event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
const newValue = event.target.value;
|
||||
onChange?.(newValue);
|
||||
}, [onChange]);
|
||||
|
||||
// Throttle height adjustments to avoid blocking typing
|
||||
if (isAutoExpanding) {
|
||||
const now = Date.now();
|
||||
const timeSinceLastAdjust = now - lastAdjustTimeRef.current;
|
||||
|
||||
if (timeSinceLastAdjust >= THROTTLE_MS) {
|
||||
adjustHeight();
|
||||
} else {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
}
|
||||
throttleTimeoutRef.current = window.setTimeout(() => {
|
||||
adjustHeight();
|
||||
throttleTimeoutRef.current = null;
|
||||
}, THROTTLE_MS - timeSinceLastAdjust);
|
||||
}
|
||||
const handleKeyUp = useCallback((event: Event) => {
|
||||
if (onKeyUp) {
|
||||
onKeyUp(event as unknown as React.KeyboardEvent<HTMLTextAreaElement>);
|
||||
}
|
||||
}, [onChange, isAutoExpanding, adjustHeight]);
|
||||
}, [onKeyUp]);
|
||||
|
||||
useEffect(() => {
|
||||
if (isAutoExpanding && textareaRef.current) {
|
||||
adjustHeight();
|
||||
const handleTextAreaFocus = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (event.target.innerText.trim() === placeholder) {
|
||||
event.target.innerText = "";
|
||||
}
|
||||
};
|
||||
const handleTextAreaBlur = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (value === "") {
|
||||
event.target.innerText = placeholder;
|
||||
}
|
||||
};
|
||||
|
||||
const handleChange = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
onChange(event.target.value);
|
||||
};
|
||||
|
||||
const fakeTextAreaRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement && fakeTextAreaElement.innerText.trim() !== "") {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
}
|
||||
}, [placeholder]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.addEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.addEventListener("keyup", handleKeyUp);
|
||||
}
|
||||
}, [value, isAutoExpanding, adjustHeight]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.removeEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.removeEventListener("keyup", handleKeyUp);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
}, [handleKeyUp, handleTextChange]);
|
||||
|
||||
return (
|
||||
useEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
const textAreaText = fakeTextAreaElement?.innerText;
|
||||
|
||||
if (fakeTextAreaElement && (value === "" || value === "\n")) {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
return;
|
||||
}
|
||||
|
||||
if (fakeTextAreaElement && textAreaText !== value) {
|
||||
fakeTextAreaElement.innerText = value;
|
||||
}
|
||||
}, [placeholder, value]);
|
||||
|
||||
return isAutoExpanding ? (
|
||||
<>
|
||||
<div
|
||||
ref={fakeTextAreaRef}
|
||||
contentEditable="true"
|
||||
role="textbox"
|
||||
aria-multiline="true"
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onFocus={handleTextAreaFocus}
|
||||
onBlur={handleTextAreaBlur}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<textarea
|
||||
ref={isAutoExpanding ? textareaRef : undefined}
|
||||
name={name}
|
||||
style={style}
|
||||
value={value}
|
||||
placeholder={placeholder}
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onChange={handleChange}
|
||||
onKeyUp={onKeyUp}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,8 @@
|
|||
import { redirect } from "next/navigation";
|
||||
|
||||
export default function handleServerErrors(
|
||||
response: Response,
|
||||
retry: ((response: Response) => Promise<Response>) | null = null,
|
||||
useCloud: boolean = false,
|
||||
): Promise<Response> {
|
||||
export default function handleServerErrors(response: Response, retry?: (response: Response) => Promise<Response>, useCloud?: boolean): Promise<Response> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if ((response.status === 401 || response.status === 403) && !useCloud) {
|
||||
if (response.status === 401 && !useCloud) {
|
||||
if (retry) {
|
||||
return retry(response)
|
||||
.catch(() => {
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||
uv sync --frozen --no-install-project --no-dev --no-editable
|
||||
|
||||
# Copy Alembic configuration
|
||||
COPY cognee/alembic.ini /app/cognee/alembic.ini
|
||||
COPY cognee/alembic/ /app/cognee/alembic
|
||||
COPY alembic.ini /app/alembic.ini
|
||||
COPY alembic/ /app/alembic
|
||||
|
||||
# Then, add the rest of the project source code and install it
|
||||
# Installing separately from its dependencies allows optimal layer caching
|
||||
|
|
|
|||
|
|
@ -56,22 +56,24 @@ if [ -n "$API_URL" ]; then
|
|||
echo "Skipping database migrations (API server handles its own database)"
|
||||
else
|
||||
echo "Direct mode: Using local cognee instance"
|
||||
# Run Alembic migrations with proper error handling.
|
||||
# Note on UserAlreadyExists error handling:
|
||||
# During database migrations, we attempt to create a default user. If this user
|
||||
# already exists (e.g., from a previous deployment or migration), it's not a
|
||||
# critical error and shouldn't prevent the application from starting. This is
|
||||
# different from other migration errors which could indicate database schema
|
||||
# inconsistencies and should cause the startup to fail. This check allows for
|
||||
# smooth redeployments and container restarts while maintaining data integrity.
|
||||
echo "Running database migrations..."
|
||||
|
||||
set +e # Disable exit on error to handle specific migration errors
|
||||
MIGRATION_OUTPUT=$(cd cognee && alembic upgrade head)
|
||||
MIGRATION_OUTPUT=$(alembic upgrade head)
|
||||
MIGRATION_EXIT_CODE=$?
|
||||
set -e
|
||||
|
||||
if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
|
||||
|
||||
echo "Migration failed with unexpected error. Trying to run Cognee without migrations."
|
||||
echo "Initializing database tables..."
|
||||
python /app/src/run_cognee_database_setup.py
|
||||
INIT_EXIT_CODE=$?
|
||||
|
||||
if [[ $INIT_EXIT_CODE -ne 0 ]]; then
|
||||
echo "Database initialization failed!"
|
||||
if [[ "$MIGRATION_OUTPUT" == *"UserAlreadyExists"* ]] || [[ "$MIGRATION_OUTPUT" == *"User default_user@example.com already exists"* ]]; then
|
||||
echo "Warning: Default user already exists, continuing startup..."
|
||||
else
|
||||
echo "Migration failed with unexpected error."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ requires-python = ">=3.10"
|
|||
dependencies = [
|
||||
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
|
||||
#"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/igorilic/Desktop/cognee",
|
||||
# TODO: Remove gemini from optional dependecnies for new Cognee version after 0.3.4
|
||||
"cognee[postgres,docs,neo4j]==0.5.0",
|
||||
"fastmcp>=2.10.0,<3.0.0",
|
||||
"mcp>=1.12.0,<2.0.0",
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class CogneeClient:
|
|||
query_type: str,
|
||||
datasets: Optional[List[str]] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
top_k: int = 10,
|
||||
top_k: int = 5,
|
||||
) -> Any:
|
||||
"""
|
||||
Search the knowledge graph.
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
from cognee.modules.engine.operations.setup import setup
|
||||
import asyncio
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(setup())
|
||||
|
|
@ -8,7 +8,6 @@ from pathlib import Path
|
|||
from typing import Optional
|
||||
|
||||
from cognee.shared.logging_utils import get_logger, setup_logging, get_log_file_location
|
||||
from cognee.shared.usage_logger import log_usage
|
||||
import importlib.util
|
||||
from contextlib import redirect_stdout
|
||||
import mcp.types as types
|
||||
|
|
@ -92,7 +91,6 @@ async def health_check(request):
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP cognify", log_type="mcp_tool")
|
||||
async def cognify(
|
||||
data: str, graph_model_file: str = None, graph_model_name: str = None, custom_prompt: str = None
|
||||
) -> list:
|
||||
|
|
@ -259,7 +257,6 @@ async def cognify(
|
|||
@mcp.tool(
|
||||
name="save_interaction", description="Logs user-agent interactions and query-answer pairs"
|
||||
)
|
||||
@log_usage(function_name="MCP save_interaction", log_type="mcp_tool")
|
||||
async def save_interaction(data: str) -> list:
|
||||
"""
|
||||
Transform and save a user-agent interaction into structured knowledge.
|
||||
|
|
@ -319,7 +316,6 @@ async def save_interaction(data: str) -> list:
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP search", log_type="mcp_tool")
|
||||
async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
|
||||
"""
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
|
@ -500,7 +496,6 @@ async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP list_data", log_type="mcp_tool")
|
||||
async def list_data(dataset_id: str = None) -> list:
|
||||
"""
|
||||
List all datasets and their data items with IDs for deletion operations.
|
||||
|
|
@ -629,7 +624,6 @@ async def list_data(dataset_id: str = None) -> list:
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP delete", log_type="mcp_tool")
|
||||
async def delete(data_id: str, dataset_id: str, mode: str = "soft") -> list:
|
||||
"""
|
||||
Delete specific data from a dataset in the Cognee knowledge graph.
|
||||
|
|
@ -709,7 +703,6 @@ async def delete(data_id: str, dataset_id: str, mode: str = "soft") -> list:
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP prune", log_type="mcp_tool")
|
||||
async def prune():
|
||||
"""
|
||||
Reset the Cognee knowledge graph by removing all stored information.
|
||||
|
|
@ -746,7 +739,6 @@ async def prune():
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
@log_usage(function_name="MCP cognify_status", log_type="mcp_tool")
|
||||
async def cognify_status():
|
||||
"""
|
||||
Get the current status of the cognify pipeline.
|
||||
|
|
@ -892,11 +884,26 @@ async def main():
|
|||
|
||||
await setup()
|
||||
|
||||
# Run Cognee migrations
|
||||
# Run Alembic migrations from the main cognee directory where alembic.ini is located
|
||||
logger.info("Running database migrations...")
|
||||
from cognee.run_migrations import run_migrations
|
||||
migration_result = subprocess.run(
|
||||
["python", "-m", "alembic", "upgrade", "head"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=Path(__file__).resolve().parent.parent.parent,
|
||||
)
|
||||
|
||||
await run_migrations()
|
||||
if migration_result.returncode != 0:
|
||||
migration_output = migration_result.stderr + migration_result.stdout
|
||||
# Check for the expected UserAlreadyExists error (which is not critical)
|
||||
if (
|
||||
"UserAlreadyExists" in migration_output
|
||||
or "User default_user@example.com already exists" in migration_output
|
||||
):
|
||||
logger.warning("Warning: Default user already exists, continuing startup...")
|
||||
else:
|
||||
logger.error(f"Migration failed with unexpected error: {migration_output}")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("Database migrations done.")
|
||||
elif args.api_url:
|
||||
|
|
|
|||
|
|
@ -627,7 +627,8 @@ class TestModel:
|
|||
print(f"Failed: {failed}")
|
||||
print(f"Success Rate: {(passed / total_tests * 100):.1f}%")
|
||||
|
||||
assert failed == 0, f"\n ⚠️ {failed} test(s) failed - review results above for details"
|
||||
if failed > 0:
|
||||
print(f"\n ⚠️ {failed} test(s) failed - review results above for details")
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
|
|||
4179
cognee-mcp/uv.lock
generated
4179
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,15 +1,3 @@
|
|||
# ⚠️ DEPRECATED - Go to `new-examples/` Instead
|
||||
|
||||
This starter kit is deprecated. Its examples have been integrated into the `/new-examples/` folder.
|
||||
|
||||
| Old Location | New Location |
|
||||
|--------------|--------------|
|
||||
| `src/pipelines/default.py` | none |
|
||||
| `src/pipelines/low_level.py` | `new-examples/custom_pipelines/organizational_hierarchy/` |
|
||||
| `src/pipelines/custom-model.py` | `new-examples/demos/custom_graph_model_entity_schema_definition.py` |
|
||||
| `src/data/` | Included in `new-examples/custom_pipelines/organizational_hierarchy/data/` |
|
||||
|
||||
----------
|
||||
|
||||
# Cognee Starter Kit
|
||||
Welcome to the <a href="https://github.com/topoteretes/cognee">cognee</a> Starter Repo! This repository is designed to help you get started quickly by providing a structured dataset and pre-built data pipelines using cognee to build powerful knowledge graphs.
|
||||
|
|
|
|||
|
|
@ -33,5 +33,3 @@ from .api.v1.ui import start_ui
|
|||
|
||||
# Pipelines
|
||||
from .modules import pipelines
|
||||
|
||||
from cognee.run_migrations import run_migrations
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
Generic single-database configuration with an async dbapi.
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
"""Enable delete for old tutorial notebooks
|
||||
|
||||
Revision ID: 1a58b986e6e1
|
||||
Revises: 46a6ce2bd2b2
|
||||
Create Date: 2025-12-17 11:04:44.414259
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "1a58b986e6e1"
|
||||
down_revision: Union[str, None] = "e1ec1dcb50b6"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def change_tutorial_deletable_flag(deletable: bool) -> None:
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
if "notebooks" not in inspector.get_table_names():
|
||||
return
|
||||
|
||||
columns = {col["name"] for col in inspector.get_columns("notebooks")}
|
||||
required_columns = {"name", "deletable"}
|
||||
if not required_columns.issubset(columns):
|
||||
return
|
||||
|
||||
notebooks = sa.table(
|
||||
"notebooks",
|
||||
sa.Column("name", sa.String()),
|
||||
sa.Column("deletable", sa.Boolean()),
|
||||
)
|
||||
|
||||
tutorial_name = "Python Development with Cognee Tutorial 🧠"
|
||||
|
||||
bind.execute(
|
||||
notebooks.update().where(notebooks.c.name == tutorial_name).values(deletable=deletable)
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
change_tutorial_deletable_flag(True)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
change_tutorial_deletable_flag(False)
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
"""Add label column to data table
|
||||
|
||||
Revision ID: a1b2c3d4e5f6
|
||||
Revises: 211ab850ef3d
|
||||
Create Date: 2025-11-17 17:54:32.123456
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "a1b2c3d4e5f6"
|
||||
down_revision: Union[str, None] = "46a6ce2bd2b2"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def _get_column(inspector, table, name, schema=None):
|
||||
for col in inspector.get_columns(table, schema=schema):
|
||||
if col["name"] == name:
|
||||
return col
|
||||
return None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
label_column = _get_column(insp, "data", "label")
|
||||
if not label_column:
|
||||
op.add_column("data", sa.Column("label", sa.String(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("data", "label")
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
"""add_last_accessed_to_data
|
||||
|
||||
Revision ID: e1ec1dcb50b6
|
||||
Revises: 211ab850ef3d
|
||||
Create Date: 2025-11-04 21:45:52.642322
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "e1ec1dcb50b6"
|
||||
down_revision: Union[str, None] = "a1b2c3d4e5f6"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def _get_column(inspector, table, name, schema=None):
|
||||
for col in inspector.get_columns(table, schema=schema):
|
||||
if col["name"] == name:
|
||||
return col
|
||||
return None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
||||
if not last_accessed_column:
|
||||
# Always create the column for schema consistency
|
||||
op.add_column("data", sa.Column("last_accessed", sa.DateTime(timezone=True), nullable=True))
|
||||
|
||||
# Only initialize existing records if feature is enabled
|
||||
enable_last_accessed = os.getenv("ENABLE_LAST_ACCESSED", "false").lower() == "true"
|
||||
if enable_last_accessed:
|
||||
op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
||||
if last_accessed_column:
|
||||
op.drop_column("data", "last_accessed")
|
||||
|
|
@ -3,7 +3,3 @@
|
|||
# CORS_ALLOWED_ORIGINS="https://yourdomain.com,https://another.com"
|
||||
# For local development, you might use:
|
||||
# CORS_ALLOWED_ORIGINS="http://localhost:3000"
|
||||
|
||||
LLM_API_KEY="your-openai-api-key"
|
||||
LLM_MODEL="openai/gpt-4o-mini"
|
||||
LLM_PROVIDER="openai"
|
||||
|
|
@ -10,14 +10,13 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|||
)
|
||||
from cognee.modules.engine.operations.setup import setup
|
||||
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
||||
from cognee.tasks.ingestion.data_item import DataItem
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def add(
|
||||
data: Union[BinaryIO, list[BinaryIO], str, list[str], DataItem, list[DataItem]],
|
||||
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
||||
dataset_name: str = "main_dataset",
|
||||
user: User = None,
|
||||
node_set: Optional[List[str]] = None,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from cognee.modules.users.methods import get_authenticated_user
|
|||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.pipelines.models import PipelineRunErrored
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.usage_logger import log_usage
|
||||
from cognee import __version__ as cognee_version
|
||||
|
||||
logger = get_logger()
|
||||
|
|
@ -20,7 +19,6 @@ def get_add_router() -> APIRouter:
|
|||
router = APIRouter()
|
||||
|
||||
@router.post("", response_model=dict)
|
||||
@log_usage(function_name="POST /v1/add", log_type="api_endpoint")
|
||||
async def add(
|
||||
data: List[UploadFile] = File(default=None),
|
||||
datasetName: Optional[str] = Form(default=None),
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
chunk_size: int = None,
|
||||
config: Config = None,
|
||||
custom_prompt: Optional[str] = None,
|
||||
chunks_per_batch: int = None,
|
||||
chunks_per_batch: int = 100,
|
||||
**kwargs,
|
||||
) -> list[Task]:
|
||||
if config is None:
|
||||
|
|
@ -272,14 +272,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
||||
}
|
||||
|
||||
if chunks_per_batch is None:
|
||||
chunks_per_batch = 100
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
embed_triplets = cognify_config.triplet_embedding
|
||||
|
||||
if chunks_per_batch is None:
|
||||
chunks_per_batch = (
|
||||
cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
|
||||
)
|
||||
|
||||
default_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(
|
||||
|
|
@ -310,7 +308,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
|
||||
|
||||
async def get_temporal_tasks(
|
||||
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
|
||||
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
|
||||
) -> list[Task]:
|
||||
"""
|
||||
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
||||
|
|
@ -332,10 +330,7 @@ async def get_temporal_tasks(
|
|||
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
||||
"""
|
||||
if chunks_per_batch is None:
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
configured = get_cognify_config().chunks_per_batch
|
||||
chunks_per_batch = configured if configured is not None else 10
|
||||
chunks_per_batch = 10
|
||||
|
||||
temporal_tasks = [
|
||||
Task(classify_documents),
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
|
|||
)
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.shared.usage_logger import log_usage
|
||||
from cognee import __version__ as cognee_version
|
||||
|
||||
logger = get_logger("api.cognify")
|
||||
|
|
@ -47,18 +46,12 @@ class CognifyPayloadDTO(InDTO):
|
|||
examples=[[]],
|
||||
description="Reference to one or more previously uploaded ontologies",
|
||||
)
|
||||
chunks_per_batch: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Number of chunks to process per task batch in Cognify (overrides default).",
|
||||
examples=[10, 20, 50, 100],
|
||||
)
|
||||
|
||||
|
||||
def get_cognify_router() -> APIRouter:
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("", response_model=dict)
|
||||
@log_usage(function_name="POST /v1/cognify", log_type="api_endpoint")
|
||||
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
"""
|
||||
Transform datasets into structured knowledge graphs through cognitive processing.
|
||||
|
|
@ -153,7 +146,6 @@ def get_cognify_router() -> APIRouter:
|
|||
config=config_to_use,
|
||||
run_in_background=payload.run_in_background,
|
||||
custom_prompt=payload.custom_prompt,
|
||||
chunks_per_batch=payload.chunks_per_batch,
|
||||
)
|
||||
|
||||
# If any cognify run errored return JSONResponse with proper error status code
|
||||
|
|
|
|||
|
|
@ -7,9 +7,7 @@ from fastapi import status
|
|||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi import HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
|
@ -46,7 +44,6 @@ class DatasetDTO(OutDTO):
|
|||
class DataDTO(OutDTO):
|
||||
id: UUID
|
||||
name: str
|
||||
label: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime] = None
|
||||
extension: str
|
||||
|
|
@ -478,40 +475,6 @@ def get_datasets_router() -> APIRouter:
|
|||
message=f"Data ({data_id}) not found in dataset ({dataset_id})."
|
||||
)
|
||||
|
||||
raw_location = data.raw_data_location
|
||||
|
||||
if raw_location.startswith("file://"):
|
||||
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
||||
|
||||
raw_location = get_data_file_path(raw_location)
|
||||
|
||||
if raw_location.startswith("s3://"):
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.infrastructure.utils.run_async import run_async
|
||||
|
||||
parsed = urlparse(raw_location)
|
||||
download_name = Path(parsed.path).name or data.name
|
||||
media_type = data.mime_type or "application/octet-stream"
|
||||
|
||||
async def file_iterator(chunk_size: int = 1024 * 1024):
|
||||
async with open_data_file(raw_location, mode="rb") as file:
|
||||
while True:
|
||||
chunk = await run_async(file.read, chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
file_iterator(),
|
||||
media_type=media_type,
|
||||
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
||||
)
|
||||
|
||||
path = Path(raw_location)
|
||||
|
||||
if not path.is_file():
|
||||
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
||||
|
||||
return FileResponse(path=path)
|
||||
return data.raw_data_location
|
||||
|
||||
return router
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from cognee.modules.users.methods import get_authenticated_user
|
|||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.pipelines.models import PipelineRunErrored
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.usage_logger import log_usage
|
||||
from cognee import __version__ as cognee_version
|
||||
|
||||
logger = get_logger()
|
||||
|
|
@ -36,7 +35,6 @@ def get_memify_router() -> APIRouter:
|
|||
router = APIRouter()
|
||||
|
||||
@router.post("", response_model=dict)
|
||||
@log_usage(function_name="POST /v1/memify", log_type="api_endpoint")
|
||||
async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
"""
|
||||
Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
|
||||
|
|
@ -92,7 +90,6 @@ def get_memify_router() -> APIRouter:
|
|||
dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
|
||||
node_name=payload.node_name,
|
||||
user=user,
|
||||
run_in_background=payload.run_in_background,
|
||||
)
|
||||
|
||||
if isinstance(memify_run, PipelineRunErrored):
|
||||
|
|
|
|||
|
|
@ -6,17 +6,14 @@ from fastapi import Depends, APIRouter
|
|||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
from cognee.modules.search.types import SearchType, SearchResult
|
||||
from cognee.modules.search.types import SearchType, SearchResult, CombinedSearchResult
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError, UserNotFoundError
|
||||
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.operations import get_history
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.shared.usage_logger import log_usage
|
||||
from cognee import __version__ as cognee_version
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.exceptions import CogneeValidationError
|
||||
|
||||
|
||||
# Note: Datasets sent by name will only map to datasets owned by the request sender
|
||||
|
|
@ -32,7 +29,7 @@ class SearchPayloadDTO(InDTO):
|
|||
node_name: Optional[list[str]] = Field(default=None, example=[])
|
||||
top_k: Optional[int] = Field(default=10)
|
||||
only_context: bool = Field(default=False)
|
||||
verbose: bool = Field(default=False)
|
||||
use_combined_context: bool = Field(default=False)
|
||||
|
||||
|
||||
def get_search_router() -> APIRouter:
|
||||
|
|
@ -75,8 +72,7 @@ def get_search_router() -> APIRouter:
|
|||
except Exception as error:
|
||||
return JSONResponse(status_code=500, content={"error": str(error)})
|
||||
|
||||
@router.post("", response_model=Union[List[SearchResult], List])
|
||||
@log_usage(function_name="POST /v1/search", log_type="api_endpoint")
|
||||
@router.post("", response_model=Union[List[SearchResult], CombinedSearchResult, List])
|
||||
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
"""
|
||||
Search for nodes in the graph database.
|
||||
|
|
@ -120,7 +116,7 @@ def get_search_router() -> APIRouter:
|
|||
"node_name": payload.node_name,
|
||||
"top_k": payload.top_k,
|
||||
"only_context": payload.only_context,
|
||||
"verbose": payload.verbose,
|
||||
"use_combined_context": payload.use_combined_context,
|
||||
"cognee_version": cognee_version,
|
||||
},
|
||||
)
|
||||
|
|
@ -137,22 +133,11 @@ def get_search_router() -> APIRouter:
|
|||
system_prompt=payload.system_prompt,
|
||||
node_name=payload.node_name,
|
||||
top_k=payload.top_k,
|
||||
verbose=payload.verbose,
|
||||
only_context=payload.only_context,
|
||||
use_combined_context=payload.use_combined_context,
|
||||
)
|
||||
|
||||
return jsonable_encoder(results)
|
||||
except (DatabaseNotCreatedError, UserNotFoundError, CogneeValidationError) as e:
|
||||
# Return a clear 422 with actionable guidance instead of leaking a stacktrace
|
||||
status_code = getattr(e, "status_code", 422)
|
||||
return JSONResponse(
|
||||
status_code=status_code,
|
||||
content={
|
||||
"error": "Search prerequisites not met",
|
||||
"detail": str(e),
|
||||
"hint": "Run `await cognee.add(...)` then `await cognee.cognify()` before searching.",
|
||||
},
|
||||
)
|
||||
except PermissionDeniedError:
|
||||
return []
|
||||
except Exception as error:
|
||||
|
|
|
|||
|
|
@ -4,16 +4,13 @@ from typing import Union, Optional, List, Type
|
|||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.types import SearchResult, SearchType
|
||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.search.methods import search as search_function
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||
from cognee.context_global_variables import set_session_user_context_variable
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.exceptions import CogneeValidationError
|
||||
from cognee.modules.users.exceptions.exceptions import UserNotFoundError
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
|
@ -32,11 +29,12 @@ async def search(
|
|||
save_interaction: bool = False,
|
||||
last_k: Optional[int] = 1,
|
||||
only_context: bool = False,
|
||||
use_combined_context: bool = False,
|
||||
session_id: Optional[str] = None,
|
||||
wide_search_top_k: Optional[int] = 100,
|
||||
triplet_distance_penalty: Optional[float] = 3.5,
|
||||
verbose: bool = False,
|
||||
) -> List[SearchResult]:
|
||||
) -> Union[List[SearchResult], CombinedSearchResult]:
|
||||
"""
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
||||
|
|
@ -181,18 +179,7 @@ async def search(
|
|||
datasets = [datasets]
|
||||
|
||||
if user is None:
|
||||
try:
|
||||
user = await get_default_user()
|
||||
except (DatabaseNotCreatedError, UserNotFoundError) as error:
|
||||
# Provide a clear, actionable message instead of surfacing low-level stacktraces
|
||||
raise CogneeValidationError(
|
||||
message=(
|
||||
"Search prerequisites not met: no database/default user found. "
|
||||
"Initialize Cognee before searching by:\n"
|
||||
"• running `await cognee.add(...)` followed by `await cognee.cognify()`."
|
||||
),
|
||||
name="SearchPreconditionError",
|
||||
) from error
|
||||
user = await get_default_user()
|
||||
|
||||
await set_session_user_context_variable(user)
|
||||
|
||||
|
|
@ -216,6 +203,7 @@ async def search(
|
|||
save_interaction=save_interaction,
|
||||
last_k=last_k,
|
||||
only_context=only_context,
|
||||
use_combined_context=use_combined_context,
|
||||
session_id=session_id,
|
||||
wide_search_top_k=wide_search_top_k,
|
||||
triplet_distance_penalty=triplet_distance_penalty,
|
||||
|
|
|
|||
|
|
@ -62,11 +62,6 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Show detailed progress information"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunks-per-batch",
|
||||
type=int,
|
||||
help="Number of chunks to process per task batch (try 50 for large single documents).",
|
||||
)
|
||||
|
||||
def execute(self, args: argparse.Namespace) -> None:
|
||||
try:
|
||||
|
|
@ -116,7 +111,6 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|||
chunk_size=args.chunk_size,
|
||||
ontology_file_path=args.ontology_file,
|
||||
run_in_background=args.background,
|
||||
chunks_per_batch=getattr(args, "chunks_per_batch", None),
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -121,16 +121,13 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|||
)
|
||||
|
||||
# Set vector and graph database configuration based on dataset database information
|
||||
# TODO: Add better handling of vector and graph config across Cognee.
|
||||
# TODO: Add better handling of vector and graph config accross Cognee.
|
||||
# LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
|
||||
vector_config = {
|
||||
"vector_db_provider": dataset_database.vector_database_provider,
|
||||
"vector_db_url": dataset_database.vector_database_url,
|
||||
"vector_db_key": dataset_database.vector_database_key,
|
||||
"vector_db_name": dataset_database.vector_database_name,
|
||||
"vector_db_port": dataset_database.vector_database_connection_info.get("port", ""),
|
||||
"vector_db_username": dataset_database.vector_database_connection_info.get("username", ""),
|
||||
"vector_db_password": dataset_database.vector_database_connection_info.get("password", ""),
|
||||
}
|
||||
|
||||
graph_config = {
|
||||
|
|
|
|||
|
|
@ -8,13 +8,10 @@ class CacheDBInterface(ABC):
|
|||
Provides a common interface for lock acquisition, release, and context-managed locking.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, host: str, port: int, lock_key: str = "default_lock", log_key: str = "usage_logs"
|
||||
):
|
||||
def __init__(self, host: str, port: int, lock_key: str):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.lock_key = lock_key
|
||||
self.log_key = log_key
|
||||
self.lock = None
|
||||
|
||||
@abstractmethod
|
||||
|
|
@ -80,37 +77,3 @@ class CacheDBInterface(ABC):
|
|||
Gracefully close any async connections.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def log_usage(
|
||||
self,
|
||||
user_id: str,
|
||||
log_entry: dict,
|
||||
ttl: int | None = 604800,
|
||||
):
|
||||
"""
|
||||
Log usage information (API endpoint calls, MCP tool invocations) to cache.
|
||||
|
||||
Args:
|
||||
user_id: The user ID.
|
||||
log_entry: Dictionary containing usage log information.
|
||||
ttl: Optional time-to-live (seconds). If provided, the log list expires after this time.
|
||||
|
||||
Raises:
|
||||
CacheConnectionError: If cache connection fails or times out.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_usage_logs(self, user_id: str, limit: int = 100):
|
||||
"""
|
||||
Retrieve usage logs for a given user.
|
||||
|
||||
Args:
|
||||
user_id: The user ID.
|
||||
limit: Maximum number of logs to retrieve (default: 100).
|
||||
|
||||
Returns:
|
||||
List of usage log entries, most recent first.
|
||||
"""
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ class CacheConfig(BaseSettings):
|
|||
- cache_port: Port number for the cache service.
|
||||
- agentic_lock_expire: Automatic lock expiration time (in seconds).
|
||||
- agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
|
||||
- usage_logging: Enable/disable usage logging for API endpoints and MCP tools.
|
||||
- usage_logging_ttl: Time-to-live for usage logs in seconds (default: 7 days).
|
||||
"""
|
||||
|
||||
cache_backend: Literal["redis", "fs"] = "fs"
|
||||
|
|
@ -26,8 +24,6 @@ class CacheConfig(BaseSettings):
|
|||
cache_password: Optional[str] = None
|
||||
agentic_lock_expire: int = 240
|
||||
agentic_lock_timeout: int = 300
|
||||
usage_logging: bool = False
|
||||
usage_logging_ttl: int = 604800
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
|
|
@ -42,8 +38,6 @@ class CacheConfig(BaseSettings):
|
|||
"cache_password": self.cache_password,
|
||||
"agentic_lock_expire": self.agentic_lock_expire,
|
||||
"agentic_lock_timeout": self.agentic_lock_timeout,
|
||||
"usage_logging": self.usage_logging,
|
||||
"usage_logging_ttl": self.usage_logging_ttl,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -89,27 +89,6 @@ class FSCacheAdapter(CacheDBInterface):
|
|||
return None
|
||||
return json.loads(value)
|
||||
|
||||
async def log_usage(
|
||||
self,
|
||||
user_id: str,
|
||||
log_entry: dict,
|
||||
ttl: int | None = 604800,
|
||||
):
|
||||
"""
|
||||
Usage logging is not supported in filesystem cache backend.
|
||||
This method is a no-op to satisfy the interface.
|
||||
"""
|
||||
logger.warning("Usage logging not supported in FSCacheAdapter, skipping")
|
||||
pass
|
||||
|
||||
async def get_usage_logs(self, user_id: str, limit: int = 100):
|
||||
"""
|
||||
Usage logging is not supported in filesystem cache backend.
|
||||
This method returns an empty list to satisfy the interface.
|
||||
"""
|
||||
logger.warning("Usage logging not supported in FSCacheAdapter, returning empty list")
|
||||
return []
|
||||
|
||||
async def close(self):
|
||||
if self.cache is not None:
|
||||
self.cache.expire()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
|
||||
|
||||
from functools import lru_cache
|
||||
import os
|
||||
from typing import Optional
|
||||
from cognee.infrastructure.databases.cache.config import get_cache_config
|
||||
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
||||
|
|
@ -16,7 +17,6 @@ def create_cache_engine(
|
|||
cache_username: str,
|
||||
cache_password: str,
|
||||
lock_key: str,
|
||||
log_key: str,
|
||||
agentic_lock_expire: int = 240,
|
||||
agentic_lock_timeout: int = 300,
|
||||
):
|
||||
|
|
@ -30,7 +30,6 @@ def create_cache_engine(
|
|||
- cache_username: Username to authenticate with.
|
||||
- cache_password: Password to authenticate with.
|
||||
- lock_key: Identifier used for the locking resource.
|
||||
- log_key: Identifier used for usage logging.
|
||||
- agentic_lock_expire: Duration to hold the lock after acquisition.
|
||||
- agentic_lock_timeout: Max time to wait for the lock before failing.
|
||||
|
||||
|
|
@ -38,7 +37,7 @@ def create_cache_engine(
|
|||
--------
|
||||
- CacheDBInterface: An instance of the appropriate cache adapter.
|
||||
"""
|
||||
if config.caching or config.usage_logging:
|
||||
if config.caching:
|
||||
from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
|
||||
|
||||
if config.cache_backend == "redis":
|
||||
|
|
@ -48,7 +47,6 @@ def create_cache_engine(
|
|||
username=cache_username,
|
||||
password=cache_password,
|
||||
lock_name=lock_key,
|
||||
log_key=log_key,
|
||||
timeout=agentic_lock_expire,
|
||||
blocking_timeout=agentic_lock_timeout,
|
||||
)
|
||||
|
|
@ -63,10 +61,7 @@ def create_cache_engine(
|
|||
return None
|
||||
|
||||
|
||||
def get_cache_engine(
|
||||
lock_key: Optional[str] = "default_lock",
|
||||
log_key: Optional[str] = "usage_logs",
|
||||
) -> Optional[CacheDBInterface]:
|
||||
def get_cache_engine(lock_key: Optional[str] = None) -> CacheDBInterface:
|
||||
"""
|
||||
Returns a cache adapter instance using current context configuration.
|
||||
"""
|
||||
|
|
@ -77,7 +72,6 @@ def get_cache_engine(
|
|||
cache_username=config.cache_username,
|
||||
cache_password=config.cache_password,
|
||||
lock_key=lock_key,
|
||||
log_key=log_key,
|
||||
agentic_lock_expire=config.agentic_lock_expire,
|
||||
agentic_lock_timeout=config.agentic_lock_timeout,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,14 +17,13 @@ class RedisAdapter(CacheDBInterface):
|
|||
host,
|
||||
port,
|
||||
lock_name="default_lock",
|
||||
log_key="usage_logs",
|
||||
username=None,
|
||||
password=None,
|
||||
timeout=240,
|
||||
blocking_timeout=300,
|
||||
connection_timeout=30,
|
||||
):
|
||||
super().__init__(host, port, lock_name, log_key)
|
||||
super().__init__(host, port, lock_name)
|
||||
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
|
@ -178,64 +177,6 @@ class RedisAdapter(CacheDBInterface):
|
|||
entries = await self.async_redis.lrange(session_key, 0, -1)
|
||||
return [json.loads(e) for e in entries]
|
||||
|
||||
async def log_usage(
|
||||
self,
|
||||
user_id: str,
|
||||
log_entry: dict,
|
||||
ttl: int | None = 604800,
|
||||
):
|
||||
"""
|
||||
Log usage information (API endpoint calls, MCP tool invocations) to Redis.
|
||||
|
||||
Args:
|
||||
user_id: The user ID.
|
||||
log_entry: Dictionary containing usage log information.
|
||||
ttl: Optional time-to-live (seconds). If provided, the log list expires after this time.
|
||||
|
||||
Raises:
|
||||
CacheConnectionError: If Redis connection fails or times out.
|
||||
"""
|
||||
try:
|
||||
usage_logs_key = f"{self.log_key}:{user_id}"
|
||||
|
||||
await self.async_redis.rpush(usage_logs_key, json.dumps(log_entry))
|
||||
|
||||
if ttl is not None:
|
||||
await self.async_redis.expire(usage_logs_key, ttl)
|
||||
|
||||
except (redis.ConnectionError, redis.TimeoutError) as e:
|
||||
error_msg = f"Redis connection error while logging usage: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise CacheConnectionError(error_msg) from e
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error while logging usage to Redis: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise CacheConnectionError(error_msg) from e
|
||||
|
||||
async def get_usage_logs(self, user_id: str, limit: int = 100):
|
||||
"""
|
||||
Retrieve usage logs for a given user.
|
||||
|
||||
Args:
|
||||
user_id: The user ID.
|
||||
limit: Maximum number of logs to retrieve (default: 100).
|
||||
|
||||
Returns:
|
||||
List of usage log entries, most recent first.
|
||||
"""
|
||||
try:
|
||||
usage_logs_key = f"{self.log_key}:{user_id}"
|
||||
entries = await self.async_redis.lrange(usage_logs_key, -limit, -1)
|
||||
return [json.loads(e) for e in reversed(entries)] if entries else []
|
||||
except (redis.ConnectionError, redis.TimeoutError) as e:
|
||||
error_msg = f"Redis connection error while retrieving usage logs: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise CacheConnectionError(error_msg) from e
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error while retrieving usage logs from Redis: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise CacheConnectionError(error_msg) from e
|
||||
|
||||
async def close(self):
|
||||
"""
|
||||
Gracefully close the async Redis connection.
|
||||
|
|
|
|||
|
|
@ -7,9 +7,6 @@ from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandle
|
|||
from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import (
|
||||
KuzuDatasetDatabaseHandler,
|
||||
)
|
||||
from cognee.infrastructure.databases.vector.pgvector.PGVectorDatasetDatabaseHandler import (
|
||||
PGVectorDatasetDatabaseHandler,
|
||||
)
|
||||
|
||||
supported_dataset_database_handlers = {
|
||||
"neo4j_aura_dev": {
|
||||
|
|
@ -17,9 +14,5 @@ supported_dataset_database_handlers = {
|
|||
"handler_provider": "neo4j",
|
||||
},
|
||||
"lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
|
||||
"pgvector": {
|
||||
"handler_instance": PGVectorDatasetDatabaseHandler,
|
||||
"handler_provider": "pgvector",
|
||||
},
|
||||
"kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,36 +24,8 @@ async def get_graph_engine() -> GraphDBInterface:
|
|||
return graph_client
|
||||
|
||||
|
||||
def create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
graph_database_url="",
|
||||
graph_database_name="",
|
||||
graph_database_username="",
|
||||
graph_database_password="",
|
||||
graph_database_port="",
|
||||
graph_database_key="",
|
||||
graph_dataset_database_handler="",
|
||||
):
|
||||
"""
|
||||
Wrapper function to call create graph engine with caching.
|
||||
For a detailed description, see _create_graph_engine.
|
||||
"""
|
||||
return _create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
graph_database_url,
|
||||
graph_database_name,
|
||||
graph_database_username,
|
||||
graph_database_password,
|
||||
graph_database_port,
|
||||
graph_database_key,
|
||||
graph_dataset_database_handler,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _create_graph_engine(
|
||||
def create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
graph_database_url="",
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
import os
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import requests
|
||||
import base64
|
||||
import hashlib
|
||||
from uuid import UUID
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
from cryptography.fernet import Fernet
|
||||
from aiohttp import BasicAuth
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_config
|
||||
from cognee.modules.users.models import User, DatasetDatabase
|
||||
|
|
@ -25,6 +23,7 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
|
||||
Quality of life improvements:
|
||||
- Allow configuration of different Neo4j Aura plans and regions.
|
||||
- Requests should be made async, currently a blocking requests library is used.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
|
@ -50,7 +49,6 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
graph_db_name = f"{dataset_id}"
|
||||
|
||||
# Client credentials and encryption
|
||||
# Note: Should not be used as class variables so that they are not persisted in memory longer than needed
|
||||
client_id = os.environ.get("NEO4J_CLIENT_ID", None)
|
||||
client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
|
||||
tenant_id = os.environ.get("NEO4J_TENANT_ID", None)
|
||||
|
|
@ -65,13 +63,22 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
"NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling."
|
||||
)
|
||||
|
||||
resp_token = await cls._get_aura_token(client_id, client_secret)
|
||||
# Make the request with HTTP Basic Auth
|
||||
def get_aura_token(client_id: str, client_secret: str) -> dict:
|
||||
url = "https://api.neo4j.io/oauth/token"
|
||||
data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
|
||||
|
||||
resp = requests.post(url, data=data, auth=(client_id, client_secret))
|
||||
resp.raise_for_status() # raises if the request failed
|
||||
return resp.json()
|
||||
|
||||
resp = get_aura_token(client_id, client_secret)
|
||||
|
||||
url = "https://api.neo4j.io/v1/instances"
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"Authorization": f"Bearer {resp_token['access_token']}",
|
||||
"Authorization": f"Bearer {resp['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
|
@ -89,38 +96,31 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
"cloud_provider": "gcp",
|
||||
}
|
||||
|
||||
async def _create_database_instance_request():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=payload) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
resp_create = await _create_database_instance_request()
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
|
||||
graph_db_name = "neo4j" # Has to be 'neo4j' for Aura
|
||||
graph_db_url = resp_create["data"]["connection_url"]
|
||||
graph_db_key = resp_token["access_token"]
|
||||
graph_db_username = resp_create["data"]["username"]
|
||||
graph_db_password = resp_create["data"]["password"]
|
||||
graph_db_url = response.json()["data"]["connection_url"]
|
||||
graph_db_key = resp["access_token"]
|
||||
graph_db_username = response.json()["data"]["username"]
|
||||
graph_db_password = response.json()["data"]["password"]
|
||||
|
||||
async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict):
|
||||
# Poll until the instance is running
|
||||
status_url = f"https://api.neo4j.io/v1/instances/{instance_id}"
|
||||
status = ""
|
||||
for attempt in range(30): # Try for up to ~5 minutes
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(status_url, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
status_resp = await resp.json()
|
||||
status = status_resp["data"]["status"]
|
||||
if status.lower() == "running":
|
||||
return
|
||||
await asyncio.sleep(10)
|
||||
status_resp = requests.get(
|
||||
status_url, headers=headers
|
||||
) # TODO: Use async requests with httpx
|
||||
status = status_resp.json()["data"]["status"]
|
||||
if status.lower() == "running":
|
||||
return
|
||||
await asyncio.sleep(10)
|
||||
raise TimeoutError(
|
||||
f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}"
|
||||
)
|
||||
|
||||
instance_id = resp_create["data"]["id"]
|
||||
instance_id = response.json()["data"]["id"]
|
||||
await _wait_for_neo4j_instance_provisioning(instance_id, headers)
|
||||
|
||||
encrypted_db_password_bytes = cipher.encrypt(graph_db_password.encode())
|
||||
|
|
@ -165,39 +165,4 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
|
||||
@classmethod
|
||||
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
||||
# Get dataset database information and credentials
|
||||
dataset_database = await cls.resolve_dataset_connection_info(dataset_database)
|
||||
|
||||
parsed_url = urlparse(dataset_database.graph_database_url)
|
||||
instance_id = parsed_url.hostname.split(".")[0]
|
||||
|
||||
url = f"https://api.neo4j.io/v1/instances/{instance_id}"
|
||||
|
||||
# Get access token for Neo4j Aura API
|
||||
# Client credentials
|
||||
client_id = os.environ.get("NEO4J_CLIENT_ID", None)
|
||||
client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
|
||||
resp = await cls._get_aura_token(client_id, client_secret)
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"Authorization": f"Bearer {resp['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.delete(url, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
@classmethod
|
||||
async def _get_aura_token(cls, client_id: str, client_secret: str) -> dict:
|
||||
url = "https://api.neo4j.io/oauth/token"
|
||||
data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url, data=data, auth=BasicAuth(client_id, client_secret)
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -236,7 +236,6 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
query_vector: Optional[List[float]] = None,
|
||||
limit: Optional[int] = None,
|
||||
with_vector: bool = False,
|
||||
include_payload: bool = False, # TODO: Add support for this parameter
|
||||
):
|
||||
"""
|
||||
Perform a search in the specified collection using either a text query or a vector
|
||||
|
|
@ -320,12 +319,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
self._na_exception_handler(e, query_string)
|
||||
|
||||
async def batch_search(
|
||||
self,
|
||||
collection_name: str,
|
||||
query_texts: List[str],
|
||||
limit: int,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
|
||||
):
|
||||
"""
|
||||
Perform a batch search using multiple text queries against a collection.
|
||||
|
|
@ -348,14 +342,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
data_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
return await asyncio.gather(
|
||||
*[
|
||||
self.search(
|
||||
collection_name,
|
||||
None,
|
||||
vector,
|
||||
limit,
|
||||
with_vectors,
|
||||
include_payload=include_payload,
|
||||
)
|
||||
self.search(collection_name, None, vector, limit, with_vectors)
|
||||
for vector in data_vectors
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
import json
|
||||
import pydantic
|
||||
from typing import Union
|
||||
from functools import lru_cache
|
||||
|
|
@ -20,7 +19,6 @@ class RelationalConfig(BaseSettings):
|
|||
db_username: Union[str, None] = None # "cognee"
|
||||
db_password: Union[str, None] = None # "cognee"
|
||||
db_provider: str = "sqlite"
|
||||
database_connect_args: Union[str, None] = None
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
|
|
@ -32,17 +30,6 @@ class RelationalConfig(BaseSettings):
|
|||
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
||||
self.db_path = databases_directory_path
|
||||
|
||||
# Parse database_connect_args if provided as JSON string
|
||||
if self.database_connect_args and isinstance(self.database_connect_args, str):
|
||||
try:
|
||||
parsed_args = json.loads(self.database_connect_args)
|
||||
if isinstance(parsed_args, dict):
|
||||
self.database_connect_args = parsed_args
|
||||
else:
|
||||
self.database_connect_args = {}
|
||||
except json.JSONDecodeError:
|
||||
self.database_connect_args = {}
|
||||
|
||||
return self
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
|
|
@ -53,8 +40,7 @@ class RelationalConfig(BaseSettings):
|
|||
--------
|
||||
|
||||
- dict: A dictionary containing database configuration settings including db_path,
|
||||
db_name, db_host, db_port, db_username, db_password, db_provider, and
|
||||
database_connect_args.
|
||||
db_name, db_host, db_port, db_username, db_password, and db_provider.
|
||||
"""
|
||||
return {
|
||||
"db_path": self.db_path,
|
||||
|
|
@ -64,7 +50,6 @@ class RelationalConfig(BaseSettings):
|
|||
"db_username": self.db_username,
|
||||
"db_password": self.db_password,
|
||||
"db_provider": self.db_provider,
|
||||
"database_connect_args": self.database_connect_args,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
from sqlalchemy import URL
|
||||
from .sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
||||
from functools import lru_cache
|
||||
|
||||
|
|
@ -12,7 +11,6 @@ def create_relational_engine(
|
|||
db_username: str,
|
||||
db_password: str,
|
||||
db_provider: str,
|
||||
database_connect_args: dict = None,
|
||||
):
|
||||
"""
|
||||
Create a relational database engine based on the specified parameters.
|
||||
|
|
@ -31,7 +29,6 @@ def create_relational_engine(
|
|||
- db_password (str): The password for database authentication, required for
|
||||
PostgreSQL.
|
||||
- db_provider (str): The type of database provider (e.g., 'sqlite' or 'postgres').
|
||||
- database_connect_args (dict, optional): Database driver connection arguments.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
|
@ -46,19 +43,12 @@ def create_relational_engine(
|
|||
# Test if asyncpg is available
|
||||
import asyncpg
|
||||
|
||||
# Handle special characters in username and password like # or @
|
||||
connection_string = URL.create(
|
||||
"postgresql+asyncpg",
|
||||
username=db_username,
|
||||
password=db_password,
|
||||
host=db_host,
|
||||
port=int(db_port),
|
||||
database=db_name,
|
||||
connection_string = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality."
|
||||
)
|
||||
|
||||
return SQLAlchemyAdapter(connection_string, connect_args=database_connect_args)
|
||||
return SQLAlchemyAdapter(connection_string)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import AsyncGenerator, List
|
|||
from contextlib import asynccontextmanager
|
||||
from sqlalchemy.orm import joinedload
|
||||
from sqlalchemy.exc import NoResultFound
|
||||
from sqlalchemy import NullPool, text, select, MetaData, Table, delete, inspect, URL
|
||||
from sqlalchemy import NullPool, text, select, MetaData, Table, delete, inspect
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||
|
||||
from cognee.modules.data.models.Data import Data
|
||||
|
|
@ -29,31 +29,10 @@ class SQLAlchemyAdapter:
|
|||
functions.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_string: str, connect_args: dict = None):
|
||||
"""
|
||||
Initialize the SQLAlchemy adapter with connection settings.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
connection_string (str): The database connection string (e.g., 'sqlite:///path/to/db'
|
||||
or 'postgresql://user:pass@host:port/db').
|
||||
connect_args (dict, optional): Database driver connection arguments.
|
||||
Configuration is loaded from RelationalConfig.database_connect_args, which reads
|
||||
from the DATABASE_CONNECT_ARGS environment variable.
|
||||
|
||||
Examples:
|
||||
PostgreSQL with SSL:
|
||||
DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
|
||||
|
||||
SQLite with custom timeout:
|
||||
DATABASE_CONNECT_ARGS='{"timeout": 60}'
|
||||
"""
|
||||
def __init__(self, connection_string: str):
|
||||
self.db_path: str = None
|
||||
self.db_uri: str = connection_string
|
||||
|
||||
# Use provided connect_args (already parsed from config)
|
||||
final_connect_args = connect_args or {}
|
||||
|
||||
if "sqlite" in connection_string:
|
||||
[prefix, db_path] = connection_string.split("///")
|
||||
self.db_path = db_path
|
||||
|
|
@ -74,7 +53,7 @@ class SQLAlchemyAdapter:
|
|||
self.engine = create_async_engine(
|
||||
connection_string,
|
||||
poolclass=NullPool,
|
||||
connect_args={**{"timeout": 30}, **final_connect_args},
|
||||
connect_args={"timeout": 30},
|
||||
)
|
||||
else:
|
||||
self.engine = create_async_engine(
|
||||
|
|
@ -84,30 +63,8 @@ class SQLAlchemyAdapter:
|
|||
pool_recycle=280,
|
||||
pool_pre_ping=True,
|
||||
pool_timeout=280,
|
||||
connect_args=final_connect_args,
|
||||
)
|
||||
|
||||
from cognee.context_global_variables import backend_access_control_enabled
|
||||
|
||||
if backend_access_control_enabled():
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
|
||||
vector_config = get_vectordb_config()
|
||||
if vector_config.vector_db_provider == "pgvector":
|
||||
# Create a maintenance engine, used when creating new postgres databases.
|
||||
# Database named "postgres" should always exist. We need this since the SQLAlchemy
|
||||
# engine cannot directly execute queries without first connecting to a database.
|
||||
maintenance_db_name = "postgres"
|
||||
maintenance_db_url = URL.create(
|
||||
"postgresql+asyncpg",
|
||||
username=vector_config.vector_db_username,
|
||||
password=vector_config.vector_db_password,
|
||||
host=vector_config.vector_db_host,
|
||||
port=int(vector_config.vector_db_port),
|
||||
database=maintenance_db_name,
|
||||
)
|
||||
self.maintenance_engine = create_async_engine(maintenance_db_url)
|
||||
|
||||
self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
|
||||
|
||||
async def push_to_s3(self) -> None:
|
||||
|
|
@ -538,32 +495,9 @@ class SQLAlchemyAdapter:
|
|||
if not await file_storage.file_exists(db_name):
|
||||
await file_storage.ensure_directory_exists()
|
||||
|
||||
from cognee.infrastructure.databases.relational.config import get_relational_config
|
||||
|
||||
relational_config = get_relational_config()
|
||||
|
||||
if self.engine.dialect.name == "sqlite" or (
|
||||
self.engine.dialect.name == "postgresql"
|
||||
and relational_config.db_provider == "postgres"
|
||||
and self.engine.url.database == relational_config.db_name
|
||||
):
|
||||
# In this case we already have a relational db created in sqlite or postgres, we just need to populate it
|
||||
async with self.engine.begin() as connection:
|
||||
if len(Base.metadata.tables.keys()) > 0:
|
||||
await connection.run_sync(Base.metadata.create_all)
|
||||
return
|
||||
|
||||
from cognee.context_global_variables import backend_access_control_enabled
|
||||
|
||||
if self.engine.dialect.name == "postgresql" and backend_access_control_enabled():
|
||||
# Connect to maintenance db in order to create new database
|
||||
# Make sure to execute CREATE DATABASE outside of transaction block, and set AUTOCOMMIT isolation level
|
||||
connection = await self.maintenance_engine.connect()
|
||||
connection = await connection.execution_options(isolation_level="AUTOCOMMIT")
|
||||
await connection.execute(text(f'CREATE DATABASE "{self.engine.url.database}";'))
|
||||
|
||||
# Clean up resources
|
||||
await connection.close()
|
||||
async with self.engine.begin() as connection:
|
||||
if len(Base.metadata.tables.keys()) > 0:
|
||||
await connection.run_sync(Base.metadata.create_all)
|
||||
|
||||
async def delete_database(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -355,7 +355,6 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
limit: Optional[int] = 15,
|
||||
with_vector: bool = False,
|
||||
normalized: bool = True,
|
||||
include_payload: bool = False, # TODO: Add support for this parameter when set to False
|
||||
):
|
||||
"""
|
||||
Search for items in a collection using either a text or a vector query.
|
||||
|
|
@ -442,7 +441,6 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
query_texts: List[str],
|
||||
limit: int = 5,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
"""
|
||||
Perform multiple searches in a single request for efficiency, returning results for each
|
||||
|
|
|
|||
|
|
@ -29,9 +29,6 @@ class VectorConfig(BaseSettings):
|
|||
vector_db_key: str = ""
|
||||
vector_db_provider: str = "lancedb"
|
||||
vector_dataset_database_handler: str = "lancedb"
|
||||
vector_db_username: str = ""
|
||||
vector_db_password: str = ""
|
||||
vector_db_host: str = ""
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
|
|
@ -68,9 +65,6 @@ class VectorConfig(BaseSettings):
|
|||
"vector_db_key": self.vector_db_key,
|
||||
"vector_db_provider": self.vector_db_provider,
|
||||
"vector_dataset_database_handler": self.vector_dataset_database_handler,
|
||||
"vector_db_username": self.vector_db_username,
|
||||
"vector_db_password": self.vector_db_password,
|
||||
"vector_db_host": self.vector_db_host,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from sqlalchemy import URL
|
||||
|
||||
from .supported_databases import supported_databases
|
||||
from .embeddings import get_embedding_engine
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
||||
|
|
@ -7,6 +5,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_context_confi
|
|||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache
|
||||
def create_vector_engine(
|
||||
vector_db_provider: str,
|
||||
vector_db_url: str,
|
||||
|
|
@ -14,38 +13,6 @@ def create_vector_engine(
|
|||
vector_db_port: str = "",
|
||||
vector_db_key: str = "",
|
||||
vector_dataset_database_handler: str = "",
|
||||
vector_db_username: str = "",
|
||||
vector_db_password: str = "",
|
||||
vector_db_host: str = "",
|
||||
):
|
||||
"""
|
||||
Wrapper function to call create vector engine with caching.
|
||||
For a detailed description, see _create_vector_engine.
|
||||
"""
|
||||
return _create_vector_engine(
|
||||
vector_db_provider,
|
||||
vector_db_url,
|
||||
vector_db_name,
|
||||
vector_db_port,
|
||||
vector_db_key,
|
||||
vector_dataset_database_handler,
|
||||
vector_db_username,
|
||||
vector_db_password,
|
||||
vector_db_host,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _create_vector_engine(
|
||||
vector_db_provider: str,
|
||||
vector_db_url: str,
|
||||
vector_db_name: str,
|
||||
vector_db_port: str,
|
||||
vector_db_key: str,
|
||||
vector_dataset_database_handler: str,
|
||||
vector_db_username: str,
|
||||
vector_db_password: str,
|
||||
vector_db_host: str,
|
||||
):
|
||||
"""
|
||||
Create a vector database engine based on the specified provider.
|
||||
|
|
@ -86,43 +53,22 @@ def _create_vector_engine(
|
|||
)
|
||||
|
||||
if vector_db_provider.lower() == "pgvector":
|
||||
from cognee.context_global_variables import backend_access_control_enabled
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
|
||||
if backend_access_control_enabled():
|
||||
connection_string: str = (
|
||||
f"postgresql+asyncpg://{vector_db_username}:{vector_db_password}"
|
||||
f"@{vector_db_host}:{vector_db_port}/{vector_db_name}"
|
||||
)
|
||||
else:
|
||||
if (
|
||||
vector_db_port
|
||||
and vector_db_username
|
||||
and vector_db_password
|
||||
and vector_db_host
|
||||
and vector_db_name
|
||||
):
|
||||
connection_string: str = (
|
||||
f"postgresql+asyncpg://{vector_db_username}:{vector_db_password}"
|
||||
f"@{vector_db_host}:{vector_db_port}/{vector_db_name}"
|
||||
)
|
||||
else:
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
# Get configuration for postgres database
|
||||
relational_config = get_relational_config()
|
||||
db_username = relational_config.db_username
|
||||
db_password = relational_config.db_password
|
||||
db_host = relational_config.db_host
|
||||
db_port = relational_config.db_port
|
||||
db_name = relational_config.db_name
|
||||
|
||||
# Get configuration for postgres database
|
||||
relational_config = get_relational_config()
|
||||
db_username = relational_config.db_username
|
||||
db_password = relational_config.db_password
|
||||
db_host = relational_config.db_host
|
||||
db_port = relational_config.db_port
|
||||
db_name = relational_config.db_name
|
||||
if not (db_host and db_port and db_name and db_username and db_password):
|
||||
raise EnvironmentError("Missing requred pgvector credentials!")
|
||||
|
||||
if not (db_host and db_port and db_name and db_username and db_password):
|
||||
raise EnvironmentError("Missing required pgvector credentials!")
|
||||
|
||||
connection_string: str = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}"
|
||||
f"@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
connection_string: str = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
from .pgvector.PGVectorAdapter import PGVectorAdapter
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ from tenacity import (
|
|||
)
|
||||
import litellm
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
||||
from cognee.infrastructure.llm.tokenizer.HuggingFace import (
|
||||
|
|
@ -81,26 +79,10 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
enable_mocking = str(enable_mocking).lower()
|
||||
self.mock = enable_mocking in ("true", "1", "yes")
|
||||
|
||||
# Validate provided custom embedding endpoint early to avoid long hangs later
|
||||
if self.endpoint:
|
||||
try:
|
||||
parsed = urlparse(self.endpoint)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if not parsed or parsed.scheme not in ("http", "https") or not parsed.netloc:
|
||||
logger.error(
|
||||
"Invalid EMBEDDING_ENDPOINT configured: '%s'. Expected a URL starting with http:// or https://",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Invalid EMBEDDING_ENDPOINT. Please set a valid URL (e.g., https://host:port) "
|
||||
"via environment variable EMBEDDING_ENDPOINT."
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(30),
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type((litellm.exceptions.NotFoundError, EmbeddingException)),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
|
|
@ -129,16 +111,12 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
return [data["embedding"] for data in response["data"]]
|
||||
else:
|
||||
async with embedding_rate_limiter_context_manager():
|
||||
# Ensure each attempt does not hang indefinitely
|
||||
response = await asyncio.wait_for(
|
||||
litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
),
|
||||
timeout=30.0,
|
||||
response = await litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
)
|
||||
|
||||
return [data["embedding"] for data in response.data]
|
||||
|
|
@ -176,27 +154,6 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
logger.error("Context window exceeded for embedding text: %s", str(error))
|
||||
raise error
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
# Per-attempt timeout – likely an unreachable endpoint
|
||||
logger.error(
|
||||
"Embedding endpoint timed out. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Verify that the endpoint is reachable and correct.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding request timed out. Check EMBEDDING_ENDPOINT connectivity."
|
||||
) from e
|
||||
|
||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
||||
logger.error(
|
||||
"Failed to connect to embedding endpoint. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Ensure the URL is correct and the server is running.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Cannot connect to embedding endpoint. Check EMBEDDING_ENDPOINT."
|
||||
) from e
|
||||
|
||||
except (
|
||||
litellm.exceptions.BadRequestError,
|
||||
litellm.exceptions.NotFoundError,
|
||||
|
|
@ -205,15 +162,8 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
raise EmbeddingException(f"Failed to index data points using model {self.model}") from e
|
||||
|
||||
except Exception as error:
|
||||
# Fall back to a clear, actionable message for connectivity/misconfiguration issues
|
||||
logger.error(
|
||||
"Error embedding text: %s. EMBEDDING_ENDPOINT='%s'.",
|
||||
str(error),
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding failed due to an unexpected error. Verify EMBEDDING_ENDPOINT and provider settings."
|
||||
) from error
|
||||
logger.error("Error embedding text: %s", str(error))
|
||||
raise error
|
||||
|
||||
def get_vector_size(self) -> int:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -231,7 +231,6 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
limit: Optional[int] = 15,
|
||||
with_vector: bool = False,
|
||||
normalized: bool = True,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
if query_text is None and query_vector is None:
|
||||
raise MissingQueryParameterError()
|
||||
|
|
@ -248,27 +247,17 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
if limit <= 0:
|
||||
return []
|
||||
|
||||
# Note: Exclude payload if not needed to optimize performance
|
||||
select_columns = (
|
||||
["id", "vector", "payload", "_distance"]
|
||||
if include_payload
|
||||
else ["id", "vector", "_distance"]
|
||||
)
|
||||
result_values = (
|
||||
await collection.vector_search(query_vector)
|
||||
.select(select_columns)
|
||||
.limit(limit)
|
||||
.to_list()
|
||||
)
|
||||
result_values = await collection.vector_search(query_vector).limit(limit).to_list()
|
||||
|
||||
if not result_values:
|
||||
return []
|
||||
|
||||
normalized_values = normalize_distances(result_values)
|
||||
|
||||
return [
|
||||
ScoredResult(
|
||||
id=parse_id(result["id"]),
|
||||
payload=result["payload"] if include_payload else None,
|
||||
payload=result["payload"],
|
||||
score=normalized_values[value_index],
|
||||
)
|
||||
for value_index, result in enumerate(result_values)
|
||||
|
|
@ -280,7 +269,6 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
query_texts: List[str],
|
||||
limit: Optional[int] = None,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
query_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
|
||||
|
|
@ -291,7 +279,6 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
with_vector=with_vectors,
|
||||
include_payload=include_payload,
|
||||
)
|
||||
for query_vector in query_vectors
|
||||
]
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue