Merge branch 'dev' into feature/cog-2746-time-graph-to-cognify

This commit is contained in:
hajdul88 2025-08-27 18:07:20 +02:00 committed by GitHub
commit 678173dad4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 8527 additions and 6353 deletions

View file

@ -153,31 +153,6 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_deduplication.py
run-deletion-test:
name: Deletion Test
runs-on: ubuntu-22.04
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run Deletion Tests
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_deletion.py
run-s3-bucket-test:
name: S3 Bucket Test
runs-on: ubuntu-22.04

View file

@ -1,116 +0,0 @@
name: Reusable Python Version Tests
on:
workflow_call:
inputs:
python-versions:
required: false
type: string
default: '["3.10.x", "3.11.x", "3.12.x"]'
secrets:
LLM_PROVIDER:
required: true
LLM_MODEL:
required: true
LLM_ENDPOINT:
required: true
LLM_API_KEY:
required: true
LLM_API_VERSION:
required: true
EMBEDDING_PROVIDER:
required: true
EMBEDDING_MODEL:
required: true
EMBEDDING_ENDPOINT:
required: true
EMBEDDING_API_KEY:
required: true
EMBEDDING_API_VERSION:
required: true
env:
RUNTIME__LOG_LEVEL: ERROR
ENV: 'dev'
jobs:
run-python-version-tests:
name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ubuntu-22.04, macos-13, macos-15]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run unit tests
shell: bash
run: uv run pytest cognee/tests/unit/
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
- name: Run integration tests
if: ${{ !contains(matrix.os, 'windows') }}
shell: bash
run: uv run pytest cognee/tests/integration/
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
- name: Run default basic pipeline
shell: bash
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_library.py
- name: Build with uv
shell: bash
run: uv build
- name: Install Package
if: ${{ !contains(matrix.os, 'windows') }}
run: |
cd dist
pip install *.whl

View file

@ -0,0 +1,235 @@
name: Tests to run on different Operating Systems
permissions:
contents: read
on:
workflow_call:
inputs:
python-versions:
required: false
type: string
default: '["3.10.x", "3.11.x", "3.12.x"]'
secrets:
LLM_PROVIDER:
required: true
LLM_MODEL:
required: true
LLM_ENDPOINT:
required: true
LLM_API_KEY:
required: true
LLM_API_VERSION:
required: true
EMBEDDING_PROVIDER:
required: true
EMBEDDING_MODEL:
required: true
EMBEDDING_ENDPOINT:
required: true
EMBEDDING_API_KEY:
required: true
EMBEDDING_API_VERSION:
required: true
env:
RUNTIME__LOG_LEVEL: ERROR
ENV: 'dev'
jobs:
run-unit-tests:
name: Unit tests ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ubuntu-22.04, macos-13, macos-15, windows-latest]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run unit tests
shell: bash
run: uv run pytest cognee/tests/unit/
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run-integration-tests:
name: Integration tests ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run integration tests
shell: bash
run: uv run pytest cognee/tests/integration/
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run-library-test:
name: Library test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run default basic pipeline
shell: bash
env:
PYTHONUTF8: 1
LLM_PROVIDER: openai
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: openai
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_library.py
run-build-test:
name: Build test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Build with uv
shell: bash
run: uv build
- name: Install Package
if: ${{ !contains(matrix.os, 'windows-latest') }}
run: |
cd dist
pip install *.whl
run-soft-deletion-test:
name: Soft Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run Soft Deletion Tests
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_delete_soft.py
run-hard-deletion-test:
name: Hard Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
fail-fast: false
steps:
- name: Check out
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ matrix.python-version }}
- name: Run Hard Deletion Test
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./cognee/tests/test_delete_hard.py

View file

@ -68,10 +68,10 @@ jobs:
uses: ./.github/workflows/notebooks_tests.yml
secrets: inherit
python-version-tests:
name: Python Version Tests
different-operating-systems-tests:
name: Operating System and Python Tests
needs: [basic-tests, e2e-tests, cli-tests]
uses: ./.github/workflows/python_version_tests.yml
uses: ./.github/workflows/test_different_operating_systems.yml
with:
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
secrets: inherit
@ -124,7 +124,7 @@ jobs:
cli-tests,
graph-db-tests,
notebook-tests,
python-version-tests,
different-operating-systems-tests,
vector-db-tests,
example-tests,
gemini-tests,
@ -144,7 +144,7 @@ jobs:
cli-tests,
graph-db-tests,
notebook-tests,
python-version-tests,
different-operating-systems-tests,
vector-db-tests,
example-tests,
db-examples-tests,
@ -165,7 +165,7 @@ jobs:
"${{ needs.cli-tests.result }}" == "success" &&
"${{ needs.graph-db-tests.result }}" == "success" &&
"${{ needs.notebook-tests.result }}" == "success" &&
"${{ needs.python-version-tests.result }}" == "success" &&
"${{ needs.different-operating-systems-tests.result }}" == "success" &&
"${{ needs.vector-db-tests.result }}" == "success" &&
"${{ needs.example-tests.result }}" == "success" &&
"${{ needs.db-examples-tests.result }}" == "success" &&

View file

@ -103,23 +103,159 @@ If youd rather run cognee-mcp in a container, you have two options:
3. Run it:
```bash
# For HTTP transport (recommended for web deployments)
docker run --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main --transport http
docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
# For SSE transport
docker run --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main --transport sse
docker run -e TRANSPORT_MODE=sse --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
# For stdio transport (default)
docker run --env-file ./.env --rm -it cognee/cognee-mcp:main
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
```
2. **Pull from Docker Hub** (no build required):
```bash
# With HTTP transport (recommended for web deployments)
docker run --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main --transport http
docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
# With SSE transport
docker run --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main --transport sse
docker run -e TRANSPORT_MODE=sse --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
# With stdio transport (default)
docker run --env-file ./.env --rm -it cognee/cognee-mcp:main
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
```
### **Important: Docker vs Direct Usage**
**Docker uses environment variables**, not command line arguments:
- ✅ Docker: `-e TRANSPORT_MODE=http`
- ❌ Docker: `--transport http` (won't work)
**Direct Python usage** uses command line arguments:
- ✅ Direct: `python src/server.py --transport http`
- ❌ Direct: `-e TRANSPORT_MODE=http` (won't work)
## 💻 Basic Usage
## 🔗 MCP Client Configuration
After starting your Cognee MCP server with Docker, you need to configure your MCP client to connect to it.
### **SSE Transport Configuration** (Recommended)
**Start the server with SSE transport:**
```bash
docker run -e TRANSPORT_MODE=sse --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
```
**Configure your MCP client:**
#### **Claude CLI (Easiest)**
```bash
claude mcp add cognee-sse -t sse http://localhost:8000/sse
```
**Verify the connection:**
```bash
claude mcp list
```
You should see your server connected:
```
Checking MCP server health...
cognee-sse: http://localhost:8000/sse (SSE) - ✓ Connected
```
#### **Manual Configuration**
**Claude (`~/.claude.json`)**
```json
{
"mcpServers": {
"cognee": {
"type": "sse",
"url": "http://localhost:8000/sse"
}
}
}
```
**Cursor (`~/.cursor/mcp.json`)**
```json
{
"mcpServers": {
"cognee-sse": {
"url": "http://localhost:8000/sse"
}
}
}
```
### **HTTP Transport Configuration** (Alternative)
**Start the server with HTTP transport:**
```bash
docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
```
**Configure your MCP client:**
#### **Claude CLI (Easiest)**
```bash
claude mcp add cognee-http -t http http://localhost:8000/mcp
```
**Verify the connection:**
```bash
claude mcp list
```
You should see your server connected:
```
Checking MCP server health...
cognee-http: http://localhost:8000/mcp (HTTP) - ✓ Connected
```
#### **Manual Configuration**
**Claude (`~/.claude.json`)**
```json
{
"mcpServers": {
"cognee": {
"type": "http",
"url": "http://localhost:8000/mcp"
}
}
}
```
**Cursor (`~/.cursor/mcp.json`)**
```json
{
"mcpServers": {
"cognee-http": {
"url": "http://localhost:8000/mcp"
}
}
}
```
### **Dual Configuration Example**
You can configure both transports simultaneously for testing:
```json
{
"mcpServers": {
"cognee-sse": {
"type": "sse",
"url": "http://localhost:8000/sse"
},
"cognee-http": {
"type": "http",
"url": "http://localhost:8000/mcp"
}
}
}
```
**Note:** Only enable the server you're actually running to avoid connection errors.
## 💻 Basic Usage
The MCP server exposes its functionality through tools. Call them from any MCP client (Cursor, Claude Desktop, Cline, Roo and more).
@ -155,45 +291,6 @@ delete(data_id="data-uuid", dataset_id="dataset-uuid", mode="soft")
delete(data_id="data-uuid", dataset_id="dataset-uuid", mode="hard")
```
Remember  use the CODE search type to query your code graph. For huge repos, run codify on modules incrementally and cache results.
### IDE Example: Cursor
1. After you run the server as described in the [Quick Start](#-quickstart), create a run script for cognee. Here is a simple example:
```
#!/bin/bash
export ENV=local
export TOKENIZERS_PARALLELISM=false
export EMBEDDING_PROVIDER="fastembed"
export EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
export EMBEDDING_DIMENSIONS=384
export EMBEDDING_MAX_TOKENS=256
export LLM_API_KEY=your-OpenAI-API-key
uv --directory /{cognee_root_path}/cognee-mcp run cognee
```
Remember to replace *your-OpenAI-API-key* and *{cognee_root_path}* with correct values.
2. Install Cursor and navigate to Settings  MCP Tools → New MCP Server
3. Cursor will open *mcp.json* file in a new tab. Configure your cognee MCP server by copy-pasting the following:
```
{
"mcpServers": {
"cognee": {
"command": "sh",
"args": [
"/{path-to-your-script}/run-cognee.sh"
]
}
}
}
```
Remember to replace *{path-to-your-script}* with the correct value of the path of the script you created in the first step.
That's it! You can refresh the server from the toggle next to your new cognee server. Check the green dot and the available tools to verify your server is running.
Now you can open your Cursor Agent and start using cognee tools from it via prompting.
## Development and Debugging
@ -211,7 +308,7 @@ Open inspector with timeout passed:
To apply new changes while developing cognee you need to do:
1. `poetry lock` in cognee folder
1. Update dependencies in cognee folder if needed
2. `uv sync --dev --all-extras --reinstall`
3. `mcp dev src/server.py`

View file

@ -8,7 +8,7 @@ requires-python = ">=3.10"
dependencies = [
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
# "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee",
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.2",
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.3",
"fastmcp>=2.10.0,<3.0.0",
"mcp>=1.12.0,<2.0.0",
"uv>=0.6.3,<1.0.0",

View file

@ -21,16 +21,16 @@ from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.storage.utils import JSONEncoder
try:
from codingagents.coding_rule_associations import (
add_rule_associations,
get_existing_rules,
)
except ModuleNotFoundError:
from .codingagents.coding_rule_associations import (
add_rule_associations,
get_existing_rules,
)
# try:
# from codingagents.coding_rule_associations import (
# add_rule_associations,
# get_existing_rules,
# )
# except ModuleNotFoundError:
# from .codingagents.coding_rule_associations import (
# add_rule_associations,
# get_existing_rules,
# )
mcp = FastMCP("Cognee")
@ -121,7 +121,9 @@ async def cognee_add_developer_rules(
@mcp.tool()
async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
async def cognify(
data: str, graph_model_file: str = None, graph_model_name: str = None, custom_prompt: str = None
) -> list:
"""
Transform ingested data into a structured knowledge graph.
@ -169,6 +171,12 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
Required if graph_model_file is specified.
Default is None, which uses the default KnowledgeGraph class.
custom_prompt : str, optional
Custom prompt string to use for entity extraction and graph generation.
If provided, this prompt will be used instead of the default prompts for
knowledge graph extraction. The prompt should guide the LLM on how to
extract entities and relationships from the text content.
Returns
-------
list
@ -224,7 +232,10 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
"""
async def cognify_task(
data: str, graph_model_file: str = None, graph_model_name: str = None
data: str,
graph_model_file: str = None,
graph_model_name: str = None,
custom_prompt: str = None,
) -> str:
"""Build knowledge graph from the input text"""
# NOTE: MCP uses stdout to communicate, we must redirect all output
@ -239,7 +250,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
await cognee.add(data)
try:
await cognee.cognify(graph_model=graph_model)
await cognee.cognify(graph_model=graph_model, custom_prompt=custom_prompt)
logger.info("Cognify process finished.")
except Exception as e:
logger.error("Cognify process failed.")
@ -250,6 +261,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
data=data,
graph_model_file=graph_model_file,
graph_model_name=graph_model_name,
custom_prompt=custom_prompt,
)
)
@ -298,7 +310,7 @@ async def save_interaction(data: str) -> list:
logger.info("Save interaction process finished.")
logger.info("Generating associated rules from interaction data.")
await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules")
# await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules")
logger.info("Associated rules generated from interaction data.")
@ -560,8 +572,10 @@ async def get_developer_rules() -> list:
async def fetch_rules_from_cognee() -> str:
"""Collect all developer rules from Cognee"""
with redirect_stdout(sys.stderr):
developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules")
return developer_rules
note = "This is broken in 0.2.2"
return note
# developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules")
# return developer_rules
rules_text = await fetch_rules_from_cognee()

View file

@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
user = await get_default_user()
detailed_extraction = True
# Multi-language support: allow passing supported_languages
supported_languages = None # defer to task defaults
tasks = [
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
Task(
get_repo_file_dependencies,
detailed_extraction=detailed_extraction,
supported_languages=supported_languages,
),
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
Task(add_data_points, task_config={"batch_size": 30}),
]

View file

@ -43,6 +43,7 @@ async def cognify(
graph_db_config: dict = None,
run_in_background: bool = False,
incremental_loading: bool = True,
custom_prompt: Optional[str] = None,
temporal_cognify: bool = False,
):
"""
@ -106,6 +107,10 @@ async def cognify(
If False, waits for completion before returning.
Background mode recommended for large datasets (>100MB).
Use pipeline_run_id from return value to monitor progress.
custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
If provided, this prompt will be used instead of the default prompts for
knowledge graph extraction. The prompt should guide the LLM on how to
extract entities and relationships from the text content.
Returns:
Union[dict, list[PipelineRunInfo]]:
@ -185,7 +190,7 @@ async def cognify(
if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size)
else:
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt)
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
@ -209,6 +214,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
chunker=TextChunker,
chunk_size: int = None,
ontology_file_path: Optional[str] = None,
custom_prompt: Optional[str] = None,
) -> list[Task]:
default_tasks = [
Task(classify_documents),
@ -222,6 +228,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
extract_graph_from_data,
graph_model=graph_model,
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
custom_prompt=custom_prompt,
task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks.
Task(

View file

@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
datasets: Optional[List[str]] = Field(default=None)
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
run_in_background: Optional[bool] = Field(default=False)
custom_prompt: Optional[str] = Field(
default=None, description="Custom prompt for entity extraction and graph generation"
)
def get_cognify_router() -> APIRouter:
@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
- **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
## Response
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
```json
{
"datasets": ["research_papers", "documentation"],
"run_in_background": false
"run_in_background": false,
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
}
```
@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
cognify_run = await cognee_cognify(
datasets, user, run_in_background=payload.run_in_background
datasets,
user,
run_in_background=payload.run_in_background,
custom_prompt=payload.custom_prompt,
)
# If any cognify run errored return JSONResponse with proper error status code

View file

@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
"type": "string",
"description": "Path to a custom ontology file",
},
"custom_prompt": {
"type": "string",
"description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
},
},
"required": ["text"],
},

View file

@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
"""Handle cognify function call"""
text = arguments.get("text")
ontology_file_path = arguments.get("ontology_file_path")
custom_prompt = arguments.get("custom_prompt")
if text:
await add(data=text, user=user)
await cognify(user=user, ontology_file_path=ontology_file_path if ontology_file_path else None)
await cognify(
user=user,
ontology_file_path=ontology_file_path if ontology_file_path else None,
custom_prompt=custom_prompt,
)
return (
"Text successfully converted into knowledge graph."

View file

@ -87,12 +87,12 @@ After successful cognify processing, use `cognee search` to query the knowledge
async def run_cognify():
try:
# Import chunker classes here
from cognee.modules.chunking import TextChunker
from cognee.modules.chunking.TextChunker import TextChunker
chunker_class = TextChunker # Default
if args.chunker == "LangchainChunker":
try:
from cognee.modules.chunking import LangchainChunker
from cognee.modules.chunking.LangchainChunker import LangchainChunker
chunker_class = LangchainChunker
except ImportError:

View file

@ -5,19 +5,24 @@ from urllib.parse import urlparse
def get_data_file_path(file_path: str):
# Check if this is a file URI BEFORE normalizing (which corrupts URIs)
if file_path.startswith("file://"):
# Remove first occurrence of file:// prefix
pure_file_path = file_path.replace("file://", "", 1)
# Normalize the file URI for Windows - replace backslashes with forward slashes
normalized_file_uri = os.path.normpath(file_path)
normalized_file_uri = os.path.normpath(pure_file_path)
parsed_url = urlparse(normalized_file_uri)
# Convert URI path to file system path
# Convert path to proper file system path
if os.name == "nt": # Windows
# Handle Windows drive letters correctly
fs_path = parsed_url.path
if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":":
fs_path = fs_path[1:] # Remove leading slash for Windows drive paths
else: # Unix-like systems
fs_path = parsed_url.path
fs_path = normalized_file_uri
if (
(fs_path.startswith("/") or fs_path.startswith("\\"))
and len(fs_path) > 1
and fs_path[2] == ":"
):
fs_path = fs_path[1:]
else:
# Unix - like systems
fs_path = normalized_file_uri
# Now split the actual filesystem path
actual_fs_path = os.path.normpath(fs_path)

View file

@ -1,6 +1,7 @@
import io
import os.path
from typing import BinaryIO, TypedDict
from pathlib import Path
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
file_type = guess_file_type(file)
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None
file_name = Path(file_path).stem if file_path else None
# Get file size
pos = file.tell() # remember current pointer

View file

@ -1,6 +1,5 @@
from typing import Type
from typing import Type, Optional, Coroutine
from pydantic import BaseModel
from typing import Coroutine
from cognee.infrastructure.llm import get_llm_config
@ -79,7 +78,10 @@ class LLMGateway:
@staticmethod
def extract_content_graph(
content: str, response_model: Type[BaseModel], mode: str = "simple"
content: str,
response_model: Type[BaseModel],
mode: str = "simple",
custom_prompt: Optional[str] = None,
) -> Coroutine:
llm_config = get_llm_config()
if llm_config.structured_output_framework.upper() == "BAML":
@ -87,13 +89,20 @@ class LLMGateway:
extract_content_graph,
)
return extract_content_graph(content=content, response_model=response_model, mode=mode)
return extract_content_graph(
content=content,
response_model=response_model,
mode=mode,
custom_prompt=custom_prompt,
)
else:
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
extract_content_graph,
)
return extract_content_graph(content=content, response_model=response_model)
return extract_content_graph(
content=content, response_model=response_model, custom_prompt=custom_prompt
)
@staticmethod
def extract_categories(content: str, response_model: Type[BaseModel]) -> Coroutine:

View file

@ -1,4 +1,4 @@
from typing import Type
from typing import Type, Optional
from pydantic import BaseModel
from cognee.infrastructure.llm.config import get_llm_config
from cognee.shared.logging_utils import get_logger, setup_logging
@ -6,7 +6,10 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.asyn
async def extract_content_graph(
content: str, response_model: Type[BaseModel], mode: str = "simple"
content: str,
response_model: Type[BaseModel],
mode: str = "simple",
custom_prompt: Optional[str] = None,
):
config = get_llm_config()
setup_logging()
@ -26,8 +29,16 @@ async def extract_content_graph(
# return graph
# else:
graph = await b.ExtractContentGraphGeneric(
content, mode=mode, baml_options={"client_registry": config.baml_registry}
)
if custom_prompt:
graph = await b.ExtractContentGraphGeneric(
content,
mode="custom",
custom_prompt_content=custom_prompt,
baml_options={"client_registry": config.baml_registry},
)
else:
graph = await b.ExtractContentGraphGeneric(
content, mode=mode, baml_options={"client_registry": config.baml_registry}
)
return graph

View file

@ -1,5 +1,5 @@
import os
from typing import Type
from typing import Type, Optional
from pydantic import BaseModel
from cognee.infrastructure.llm.LLMGateway import LLMGateway
@ -8,21 +8,25 @@ from cognee.infrastructure.llm.config import (
)
async def extract_content_graph(content: str, response_model: Type[BaseModel]):
llm_config = get_llm_config()
prompt_path = llm_config.graph_prompt_path
# Check if the prompt path is an absolute path or just a filename
if os.path.isabs(prompt_path):
# directory containing the file
base_directory = os.path.dirname(prompt_path)
# just the filename itself
prompt_path = os.path.basename(prompt_path)
async def extract_content_graph(
content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
):
if custom_prompt:
system_prompt = custom_prompt
else:
base_directory = None
llm_config = get_llm_config()
prompt_path = llm_config.graph_prompt_path
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
# Check if the prompt path is an absolute path or just a filename
if os.path.isabs(prompt_path):
# directory containing the file
base_directory = os.path.dirname(prompt_path)
# just the filename itself
prompt_path = os.path.basename(prompt_path)
else:
base_directory = None
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
content_graph = await LLMGateway.acreate_structured_output(
content, system_prompt, response_model

View file

@ -266,48 +266,24 @@ async def run_tasks(
if incremental_loading:
data = await resolve_data_directories(data)
# TODO: Return to using async.gather for data items after Cognee release
# # Create async tasks per data item that will run the pipeline for the data item
# data_item_tasks = [
# asyncio.create_task(
# _run_tasks_data_item(
# data_item,
# dataset,
# tasks,
# pipeline_name,
# pipeline_id,
# pipeline_run_id,
# context,
# user,
# incremental_loading,
# )
# )
# for data_item in data
# ]
# results = await asyncio.gather(*data_item_tasks)
# # Remove skipped data items from results
# results = [result for result in results if result]
### TEMP sync data item handling
results = []
# Run the pipeline for each data_item sequentially, one after the other
for data_item in data:
result = await _run_tasks_data_item(
data_item,
dataset,
tasks,
pipeline_name,
pipeline_id,
pipeline_run_id,
context,
user,
incremental_loading,
# Create async tasks per data item that will run the pipeline for the data item
data_item_tasks = [
asyncio.create_task(
_run_tasks_data_item(
data_item,
dataset,
tasks,
pipeline_name,
pipeline_id,
pipeline_run_id,
context,
user,
incremental_loading,
)
)
# Skip items that returned a false-y value
if result:
results.append(result)
### END
for data_item in data
]
results = await asyncio.gather(*data_item_tasks)
# Remove skipped data items from results
results = [result for result in results if result]

View file

@ -1,37 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
class CodeFile(DataPoint):
name: str
file_path: str
language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
source_code: Optional[str] = None
part_of: Optional[Repository] = None
depends_on: Optional[List["ImportStatement"]] = []

View file

@ -15,14 +15,43 @@ from typing import Protocol
# Configure external library logging
def configure_external_library_logging():
"""Configure logging for external libraries to reduce verbosity"""
# Set environment variables to suppress LiteLLM logging
os.environ.setdefault("LITELLM_LOG", "ERROR")
os.environ.setdefault("LITELLM_SET_VERBOSE", "False")
# Configure LiteLLM logging to reduce verbosity
try:
import litellm
# Disable verbose logging
litellm.set_verbose = False
# Suppress LiteLLM ERROR logging using standard logging
logging.getLogger("litellm").setLevel(logging.CRITICAL)
# Set additional LiteLLM configuration
if hasattr(litellm, "suppress_debug_info"):
litellm.suppress_debug_info = True
if hasattr(litellm, "turn_off_message"):
litellm.turn_off_message = True
if hasattr(litellm, "_turn_on_debug"):
litellm._turn_on_debug = False
# Comprehensive logger suppression
loggers_to_suppress = [
"litellm",
"litellm.litellm_core_utils.logging_worker",
"litellm.litellm_core_utils",
"litellm.proxy",
"litellm.router",
"openai._base_client",
"LiteLLM", # Capital case variant
"LiteLLM.core",
"LiteLLM.logging_worker",
"litellm.logging_worker",
]
for logger_name in loggers_to_suppress:
logging.getLogger(logger_name).setLevel(logging.CRITICAL)
logging.getLogger(logger_name).disabled = True
except ImportError:
# LiteLLM not available, skip configuration
pass
@ -244,6 +273,75 @@ def setup_logging(log_level=None, name=None):
# Configure external library logging early to suppress verbose output
configure_external_library_logging()
# Add custom filter to suppress LiteLLM worker cancellation errors
class LiteLLMCancellationFilter(logging.Filter):
"""Filter to suppress LiteLLM worker cancellation messages"""
def filter(self, record):
# Check if this is a LiteLLM-related logger
if hasattr(record, "name") and "litellm" in record.name.lower():
return False
# Check message content for cancellation errors
if hasattr(record, "msg") and record.msg:
msg_str = str(record.msg).lower()
if any(
keyword in msg_str
for keyword in [
"loggingworker cancelled",
"logging_worker.py",
"cancellederror",
"litellm:error",
]
):
return False
# Check formatted message
try:
if hasattr(record, "getMessage"):
formatted_msg = record.getMessage().lower()
if any(
keyword in formatted_msg
for keyword in [
"loggingworker cancelled",
"logging_worker.py",
"cancellederror",
"litellm:error",
]
):
return False
except Exception:
pass
return True
# Apply the filter to root logger and specific loggers
cancellation_filter = LiteLLMCancellationFilter()
logging.getLogger().addFilter(cancellation_filter)
logging.getLogger("litellm").addFilter(cancellation_filter)
# Add custom filter to suppress LiteLLM worker cancellation errors
class LiteLLMFilter(logging.Filter):
def filter(self, record):
# Suppress LiteLLM worker cancellation errors
if hasattr(record, "msg") and isinstance(record.msg, str):
msg_lower = record.msg.lower()
if any(
phrase in msg_lower
for phrase in [
"loggingworker cancelled",
"cancellederror",
"logging_worker.py",
"loggingerror",
]
):
return False
return True
# Apply filter to root logger
litellm_filter = LiteLLMFilter()
logging.getLogger().addFilter(litellm_filter)
def exception_handler(logger, method_name, event_dict):
"""Custom processor to handle uncaught exceptions."""
# Check if there's an exc_info that needs to be processed

View file

@ -1,5 +1,5 @@
import asyncio
from typing import Type, List
from typing import Type, List, Optional
from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
@ -71,6 +71,7 @@ async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver = None,
custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]:
"""
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
@ -84,7 +85,10 @@ async def extract_graph_from_data(
raise InvalidGraphModelError(graph_model)
chunk_graphs = await asyncio.gather(
*[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
*[
LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
for chunk in data_chunks
]
)
# Note: Filter edges with missing source or target nodes

View file

@ -180,6 +180,7 @@ async def get_local_script_dependencies(
name=file_path_relative_to_repo,
source_code=source_code,
file_path=script_path,
language="python",
)
return code_file_node
@ -188,6 +189,7 @@ async def get_local_script_dependencies(
name=file_path_relative_to_repo,
source_code=None,
file_path=script_path,
language="python",
)
async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):

View file

@ -10,50 +10,80 @@ from cognee.infrastructure.engine import DataPoint
from cognee.shared.CodeGraphEntities import CodeFile, Repository
async def get_source_code_files(repo_path):
async def get_source_code_files(repo_path, language_config: dict[str, list[str]] | None = None):
"""
Retrieve Python source code files from the specified repository path.
This function scans the given repository path for files that have the .py extension
while excluding test files and files within a virtual environment. It returns a list of
absolute paths to the source code files that are not empty.
Retrieve source code files from the specified repository path for multiple languages.
Parameters:
-----------
- repo_path: The file path to the repository to search for Python source files.
- repo_path: The file path to the repository to search for source files.
- language_config: dict mapping language names to file extensions, e.g.,
{'python': ['.py'], 'javascript': ['.js', '.jsx'], ...}
Returns:
--------
A list of absolute paths to .py files that contain source code, excluding empty
files, test files, and files from a virtual environment.
A list of (absolute_path, language) tuples for source code files.
"""
if not os.path.exists(repo_path):
return {}
py_files_paths = (
os.path.join(root, file)
for root, _, files in os.walk(repo_path)
for file in files
if (
file.endswith(".py")
and not file.startswith("test_")
and not file.endswith("_test")
and ".venv" not in file
)
)
def _get_language_from_extension(file, language_config):
for lang, exts in language_config.items():
for ext in exts:
if file.endswith(ext):
return lang
return None
# Default config if not provided
if language_config is None:
language_config = {
"python": [".py"],
"javascript": [".js", ".jsx"],
"typescript": [".ts", ".tsx"],
"java": [".java"],
"csharp": [".cs"],
"go": [".go"],
"rust": [".rs"],
"cpp": [".cpp", ".c", ".h", ".hpp"],
}
if not os.path.exists(repo_path):
return []
source_code_files = set()
for file_path in py_files_paths:
file_path = os.path.abspath(file_path)
for root, _, files in os.walk(repo_path):
for file in files:
lang = _get_language_from_extension(file, language_config)
if lang is None:
continue
# Exclude tests and common build/venv directories
excluded_dirs = {
".venv",
"venv",
"env",
".env",
"site-packages",
"node_modules",
"dist",
"build",
".git",
"tests",
"test",
}
root_parts = set(os.path.normpath(root).split(os.sep))
base_name, _ext = os.path.splitext(file)
if (
base_name.startswith("test_")
or base_name.endswith("_test") # catches Go's *_test.go and similar
or ".test." in file
or ".spec." in file
or (excluded_dirs & root_parts)
):
continue
file_path = os.path.abspath(os.path.join(root, file))
if os.path.getsize(file_path) == 0:
continue
source_code_files.add((file_path, lang))
if os.path.getsize(file_path) == 0:
continue
source_code_files.add(file_path)
return list(source_code_files)
return sorted(list(source_code_files))
def run_coroutine(coroutine_func, *args, **kwargs):
@ -85,22 +115,23 @@ def run_coroutine(coroutine_func, *args, **kwargs):
async def get_repo_file_dependencies(
repo_path: str, detailed_extraction: bool = False
repo_path: str, detailed_extraction: bool = False, supported_languages: list = None
) -> AsyncGenerator[DataPoint, None]:
"""
Generate a dependency graph for Python files in the given repository path.
Generate a dependency graph for source files (multi-language) in the given repository path.
Check the validity of the repository path and yield a repository object followed by the
dependencies of Python files within that repository. Raise a FileNotFoundError if the
dependencies of source files within that repository. Raise a FileNotFoundError if the
provided path does not exist. The extraction of detailed dependencies can be controlled
via the `detailed_extraction` argument.
via the `detailed_extraction` argument. Languages considered can be restricted via
the `supported_languages` argument.
Parameters:
-----------
- repo_path (str): The file path to the repository where Python files are located.
- detailed_extraction (bool): A flag indicating whether to perform a detailed
extraction of dependencies (default is False). (default False)
- repo_path (str): The file path to the repository to process.
- detailed_extraction (bool): Whether to perform a detailed extraction of code parts.
- supported_languages (list | None): Subset of languages to include; if None, use defaults.
"""
if isinstance(repo_path, list) and len(repo_path) == 1:
@ -109,7 +140,25 @@ async def get_repo_file_dependencies(
if not os.path.exists(repo_path):
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
source_code_files = await get_source_code_files(repo_path)
# Build language config from supported_languages
default_language_config = {
"python": [".py"],
"javascript": [".js", ".jsx"],
"typescript": [".ts", ".tsx"],
"java": [".java"],
"csharp": [".cs"],
"go": [".go"],
"rust": [".rs"],
"cpp": [".cpp", ".c", ".h", ".hpp"],
}
if supported_languages is not None:
language_config = {
k: v for k, v in default_language_config.items() if k in supported_languages
}
else:
language_config = default_language_config
source_code_files = await get_source_code_files(repo_path, language_config=language_config)
repo = Repository(
id=uuid5(NAMESPACE_OID, repo_path),
@ -128,19 +177,42 @@ async def get_repo_file_dependencies(
for chunk_number in range(number_of_chunks)
]
# Codegraph dependencies are not installed by default, so we import where we use them.
# Import dependency extractors for each language (Python for now, extend later)
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
import aiofiles
# TODO: Add other language extractors here
for start_range, end_range in chunk_ranges:
# with ProcessPoolExecutor(max_workers=12) as executor:
tasks = [
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
for file_path in source_code_files[start_range : end_range + 1]
]
tasks = []
for file_path, lang in source_code_files[start_range : end_range + 1]:
# For now, only Python is supported; extend with other languages
if lang == "python":
tasks.append(
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
)
else:
# Placeholder: create a minimal CodeFile for other languages
async def make_codefile_stub(file_path=file_path, lang=lang):
async with aiofiles.open(
file_path, "r", encoding="utf-8", errors="replace"
) as f:
source = await f.read()
return CodeFile(
id=uuid5(NAMESPACE_OID, file_path),
name=os.path.relpath(file_path, repo_path),
file_path=file_path,
language=lang,
source_code=source,
)
tasks.append(make_codefile_stub())
results: list[CodeFile] = await asyncio.gather(*tasks)
for source_code_file in results:
source_code_file.part_of = repo
if getattr(
source_code_file, "language", None
) is None and source_code_file.file_path.endswith(".py"):
source_code_file.language = "python"
yield source_code_file

View file

@ -83,11 +83,16 @@ class TestCliIntegration:
# Note: This might fail due to dependencies, but we're testing the CLI structure
# The important thing is that it doesn't crash with argument parsing errors
assert (
"error" not in result.stderr.lower()
or "failed to add data" in result.stderr.lower()
# Allow litellm logging worker cancellation errors as they're expected during process shutdown
stderr_lower = result.stderr.lower()
has_error = "error" in stderr_lower
has_expected_failure = "failed to add data" in stderr_lower
has_litellm_cancellation = (
"loggingworker cancelled" in stderr_lower or "cancellederror" in stderr_lower
)
assert not has_error or has_expected_failure or has_litellm_cancellation
finally:
os.unlink(temp_file)

View file

@ -2,6 +2,7 @@ import os
import sys
import uuid
import pytest
import pathlib
from unittest.mock import patch
from cognee.modules.chunking.TextChunker import TextChunker
@ -24,8 +25,7 @@ GROUND_TRUTH = [
@pytest.mark.asyncio
async def test_PdfDocument(mock_engine):
test_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
pathlib.Path(__file__).parent.parent.parent,
"test_data",
"artificial-intelligence.pdf",
)

View file

@ -2,6 +2,7 @@ import os
import sys
import uuid
import pytest
import pathlib
from unittest.mock import patch
from cognee.modules.chunking.TextChunker import TextChunker
@ -34,10 +35,7 @@ GROUND_TRUTH = {
@pytest.mark.asyncio
async def test_TextDocument(mock_engine, input_file, chunk_size):
test_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
"test_data",
input_file,
pathlib.Path(__file__).parent.parent.parent, "test_data", input_file
)
document = TextDocument(
id=uuid.uuid4(),

View file

@ -2,6 +2,7 @@ import os
import sys
import uuid
import pytest
import pathlib
from unittest.mock import patch
from cognee.modules.chunking.TextChunker import TextChunker
@ -18,29 +19,25 @@ chunk_by_sentence_module = sys.modules.get("cognee.tasks.chunks.chunk_by_sentenc
async def test_UnstructuredDocument(mock_engine):
# Define file paths of test data
pptx_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
pathlib.Path(__file__).parent.parent.parent,
"test_data",
"example.pptx",
)
docx_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
pathlib.Path(__file__).parent.parent.parent,
"test_data",
"example.docx",
)
csv_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
pathlib.Path(__file__).parent.parent.parent,
"test_data",
"example.csv",
)
xlsx_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
pathlib.Path(__file__).parent.parent.parent,
"test_data",
"example.xlsx",
)

View file

@ -45,8 +45,6 @@ async def main():
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
"""
################### HARD DELETE
# Add documents and get dataset information
add_result = await cognee.add(
[
@ -80,41 +78,6 @@ async def main():
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with hard delete."
################### SOFT DELETE
# Add documents and get dataset information
add_result = await cognee.add(
[
pdf_document,
txt_document,
text_document_as_literal,
unstructured_document,
audio_document,
image_document,
]
)
dataset_id = add_result.dataset_id
await cognee.cognify()
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
# Get the data IDs from the dataset
dataset_data = await get_dataset_data(dataset_id)
assert len(dataset_data) > 0, "Dataset should contain data"
# Delete each document using its ID
for data_item in dataset_data:
await cognee.delete(data_item.id, dataset_id, mode="soft")
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
if __name__ == "__main__":
import asyncio

View file

@ -0,0 +1,85 @@
import os
import shutil
import cognee
import pathlib
from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_dataset_data
logger = get_logger()
async def main():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
pdf_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
)
txt_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
)
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
text_document_as_literal = """
1. Audi
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
2. BMW
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
3. Mercedes-Benz
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
4. Porsche
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
5. Volkswagen
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
"""
# Add documents and get dataset information
add_result = await cognee.add(
[
pdf_document,
txt_document,
text_document_as_literal,
unstructured_document,
audio_document,
image_document,
]
)
dataset_id = add_result.dataset_id
await cognee.cognify()
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
# Get the data IDs from the dataset
dataset_data = await get_dataset_data(dataset_id)
assert len(dataset_data) > 0, "Dataset should contain data"
# Delete each document using its ID
for data_item in dataset_data:
await cognee.delete(data_item.id, dataset_id, mode="soft")
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View file

@ -229,7 +229,7 @@ class TestCognifyCommand:
mock_asyncio_run.assert_called_once()
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
from cognee.modules.chunking import TextChunker
from cognee.modules.chunking.TextChunker import TextChunker
mock_cognee.cognify.assert_awaited_once_with(
datasets=None,

View file

@ -253,7 +253,7 @@ class TestCognifyCommandEdgeCases:
mock_asyncio_run.assert_called_once()
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
from cognee.modules.chunking import TextChunker
from cognee.modules.chunking.TextChunker import TextChunker
mock_cognee.cognify.assert_awaited_once_with(
datasets=None,
@ -286,7 +286,7 @@ class TestCognifyCommandEdgeCases:
mock_asyncio_run.assert_called_once()
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
from cognee.modules.chunking import TextChunker
from cognee.modules.chunking.TextChunker import TextChunker
mock_cognee.cognify.assert_awaited_once_with(
datasets=None,
@ -306,9 +306,17 @@ class TestCognifyCommandEdgeCases:
def mock_import_func(name, fromlist=None, *args, **kwargs):
if name == "cognee":
return mock_cognee
elif name == "cognee.modules.chunking" and fromlist and "LangchainChunker" in fromlist:
elif (
name == "cognee.modules.chunking.LangchainChunker"
and fromlist
and "LangchainChunker" in fromlist
):
raise ImportError("LangchainChunker not available")
elif name == "cognee.modules.chunking":
elif (
name == "cognee.modules.chunking.TextChunker"
and fromlist
and "TextChunker" in fromlist
):
module = MagicMock()
module.TextChunker = MagicMock()
return module
@ -356,7 +364,7 @@ class TestCognifyCommandEdgeCases:
mock_asyncio_run.assert_called_once()
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
from cognee.modules.chunking import TextChunker
from cognee.modules.chunking.TextChunker import TextChunker
mock_cognee.cognify.assert_awaited_once_with(
datasets=None,

File diff suppressed because it is too large Load diff

View file

@ -21,13 +21,15 @@
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:54:44.613431Z",
"start_time": "2025-06-30T11:54:44.606687Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"import os\n",
"import pathlib\n",
@ -44,9 +46,7 @@
" \"../\",\n",
" \"examples/data/multimedia/example.png\",\n",
")"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "markdown",
@ -57,12 +57,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:54:46.739157Z",
"start_time": "2025-06-30T11:54:46.734808Z"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
@ -93,9 +95,7 @@
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
],
"outputs": [],
"execution_count": 2
]
},
{
"cell_type": "markdown",
@ -106,12 +106,213 @@
},
{
"cell_type": "code",
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:55:01.959946Z",
"start_time": "2025-06-30T11:54:50.569659Z"
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[2m2025-08-27T14:33:41.256195\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-08-27_14-00-27.log\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"/Users/daulet/Desktop/dev/cognee-claude/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"\n",
"\u001b[2m2025-08-27T14:33:42.133224\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mLogging initialized \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m \u001b[36mcognee_version\u001b[0m=\u001b[35m0.2.4-local\u001b[0m \u001b[36mdatabase_path\u001b[0m=\u001b[35m/Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m \u001b[36mgraph_database_name\u001b[0m=\u001b[35m\u001b[0m \u001b[36mos_info\u001b[0m=\u001b[35m'Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)'\u001b[0m \u001b[36mpython_version\u001b[0m=\u001b[35m3.12.7\u001b[0m \u001b[36mrelational_config\u001b[0m=\u001b[35mcognee_db\u001b[0m \u001b[36mstructlog_version\u001b[0m=\u001b[35m25.4.0\u001b[0m \u001b[36mvector_config\u001b[0m=\u001b[35mlancedb\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:42.133667\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase storage: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:43.785214\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:44.215920\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[1mLangfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client\u001b[0m\n",
"\u001b[92m15:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"User 37ea34fa-cae7-4bea-8cb3-1ba234688771 has registered.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[1mEmbeddingRateLimiter initialized: enabled=False, requests_limit=60, interval_seconds=60\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.440270\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.690756\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.996600\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.287352\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: pypdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.287759\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: text_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288078\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: image_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288341\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: audio_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288576\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: unstructured_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:52 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
"\n",
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.455447\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.599686\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.806593\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.075106\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.209912\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.355890\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:53 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\u001b[92m15:33:57 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
"\n",
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.407561\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.560808\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.713507\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.897060\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.938027\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.093101\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.255165\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.428623\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.588682\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:58 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.706892\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.707703\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708083\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708440\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708802\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.709129\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.709475\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'how many programmers does it take to change a light bulb? none, thats a hardware problem.' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:24.553989\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:34:24 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:32.883579\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.680233\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.825933\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.975352\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.126720\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.275404\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.424984\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.576258\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.754472\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.912219\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.053036\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.220157\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.388094\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:34:37 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.010321\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.012394\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmers' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.012794\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013111\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013378\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013598\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013914\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'joke' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.014215\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer joke' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:02.040520\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:35:02 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:11.589828\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.446614\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.622281\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.820192\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.004173\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.518803\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.756519\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.978364\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('17b5c469-a8ce-5347-bea5-ab3dba767d13')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('5b1e3c7e-d837-5704-a3b3-53abdda3a84f')}])}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
},
],
"source": [
"import cognee\n",
"\n",
@ -124,75 +325,7 @@
"\n",
"# Create knowledge graph with cognee\n",
"await cognee.cognify()"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/vasilije/cognee/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"ename": "OperationalError",
"evalue": "(sqlite3.OperationalError) database is locked\n[SQL: \nCREATE TABLE data (\n\tid UUID NOT NULL, \n\tname VARCHAR, \n\textension VARCHAR, \n\tmime_type VARCHAR, \n\traw_data_location VARCHAR, \n\towner_id UUID, \n\tcontent_hash VARCHAR, \n\texternal_metadata JSON, \n\tnode_set JSON, \n\ttoken_count INTEGER, \n\tcreated_at DATETIME, \n\tupdated_at DATETIME, \n\tPRIMARY KEY (id)\n)\n\n]\n(Background on this error at: https://sqlalche.me/e/20/e3q8)",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mOperationalError\u001B[39m Traceback (most recent call last)",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1964\u001B[39m, in \u001B[36mConnection._exec_single_context\u001B[39m\u001B[34m(self, dialect, context, statement, parameters)\u001B[39m\n\u001B[32m 1963\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m evt_handled:\n\u001B[32m-> \u001B[39m\u001B[32m1964\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m.\u001B[49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1965\u001B[39m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstr_statement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43meffective_parameters\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\n\u001B[32m 1966\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1968\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m._has_events \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m.engine._has_events:\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:942\u001B[39m, in \u001B[36mDefaultDialect.do_execute\u001B[39m\u001B[34m(self, cursor, statement, parameters, context)\u001B[39m\n\u001B[32m 941\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context=\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[32m--> \u001B[39m\u001B[32m942\u001B[39m \u001B[43mcursor\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:172\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_cursor.execute\u001B[39m\u001B[34m(self, operation, parameters)\u001B[39m\n\u001B[32m 171\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[32m--> \u001B[39m\u001B[32m172\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_adapt_connection\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_handle_exception\u001B[49m\u001B[43m(\u001B[49m\u001B[43merror\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:323\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_connection._handle_exception\u001B[39m\u001B[34m(self, error)\u001B[39m\n\u001B[32m 322\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m323\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m error\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:154\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_cursor.execute\u001B[39m\u001B[34m(self, operation, parameters)\u001B[39m\n\u001B[32m 153\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m154\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mawait_\u001B[49m\u001B[43m(\u001B[49m\u001B[43m_cursor\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43moperation\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 156\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m _cursor.description:\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py:132\u001B[39m, in \u001B[36mawait_only\u001B[39m\u001B[34m(awaitable)\u001B[39m\n\u001B[32m 128\u001B[39m \u001B[38;5;66;03m# returns the control to the driver greenlet passing it\u001B[39;00m\n\u001B[32m 129\u001B[39m \u001B[38;5;66;03m# a coroutine to run. Once the awaitable is done, the driver greenlet\u001B[39;00m\n\u001B[32m 130\u001B[39m \u001B[38;5;66;03m# switches back to this greenlet with the result of awaitable that is\u001B[39;00m\n\u001B[32m 131\u001B[39m \u001B[38;5;66;03m# then returned to the caller (or raised as error)\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m132\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mcurrent\u001B[49m\u001B[43m.\u001B[49m\u001B[43mparent\u001B[49m\u001B[43m.\u001B[49m\u001B[43mswitch\u001B[49m\u001B[43m(\u001B[49m\u001B[43mawaitable\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py:196\u001B[39m, in \u001B[36mgreenlet_spawn\u001B[39m\u001B[34m(fn, _require_await, *args, **kwargs)\u001B[39m\n\u001B[32m 193\u001B[39m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[32m 194\u001B[39m \u001B[38;5;66;03m# wait for a coroutine from await_only and then return its\u001B[39;00m\n\u001B[32m 195\u001B[39m \u001B[38;5;66;03m# result back to it.\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m196\u001B[39m value = \u001B[38;5;28;01mawait\u001B[39;00m result\n\u001B[32m 197\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m:\n\u001B[32m 198\u001B[39m \u001B[38;5;66;03m# this allows an exception to be raised within\u001B[39;00m\n\u001B[32m 199\u001B[39m \u001B[38;5;66;03m# the moderated greenlet so that it can continue\u001B[39;00m\n\u001B[32m 200\u001B[39m \u001B[38;5;66;03m# its expected flow.\u001B[39;00m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/cursor.py:48\u001B[39m, in \u001B[36mCursor.execute\u001B[39m\u001B[34m(self, sql, parameters)\u001B[39m\n\u001B[32m 47\u001B[39m parameters = []\n\u001B[32m---> \u001B[39m\u001B[32m48\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m._execute(\u001B[38;5;28mself\u001B[39m._cursor.execute, sql, parameters)\n\u001B[32m 49\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/cursor.py:40\u001B[39m, in \u001B[36mCursor._execute\u001B[39m\u001B[34m(self, fn, *args, **kwargs)\u001B[39m\n\u001B[32m 39\u001B[39m \u001B[38;5;250m\u001B[39m\u001B[33;03m\"\"\"Execute the given function on the shared connection's thread.\"\"\"\u001B[39;00m\n\u001B[32m---> \u001B[39m\u001B[32m40\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m._conn._execute(fn, *args, **kwargs)\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/core.py:132\u001B[39m, in \u001B[36mConnection._execute\u001B[39m\u001B[34m(self, fn, *args, **kwargs)\u001B[39m\n\u001B[32m 130\u001B[39m \u001B[38;5;28mself\u001B[39m._tx.put_nowait((future, function))\n\u001B[32m--> \u001B[39m\u001B[32m132\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m future\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/core.py:115\u001B[39m, in \u001B[36mConnection.run\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m 114\u001B[39m LOG.debug(\u001B[33m\"\u001B[39m\u001B[33mexecuting \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m\"\u001B[39m, function)\n\u001B[32m--> \u001B[39m\u001B[32m115\u001B[39m result = \u001B[43mfunction\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 116\u001B[39m LOG.debug(\u001B[33m\"\u001B[39m\u001B[33moperation \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m completed\u001B[39m\u001B[33m\"\u001B[39m, function)\n",
"\u001B[31mOperationalError\u001B[39m: database is locked",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001B[31mOperationalError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[3]\u001B[39m\u001B[32m, line 8\u001B[39m\n\u001B[32m 5\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m cognee.prune.prune_system(metadata=\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[32m 7\u001B[39m \u001B[38;5;66;03m# Add multimedia files and make them available for cognify\u001B[39;00m\n\u001B[32m----> \u001B[39m\u001B[32m8\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m cognee.add([mp3_file_path, png_file_path])\n\u001B[32m 10\u001B[39m \u001B[38;5;66;03m# Create knowledge graph with cognee\u001B[39;00m\n\u001B[32m 11\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m cognee.cognify()\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/Projects/release_test/cognee/cognee/api/v1/add/add.py:26\u001B[39m, in \u001B[36madd\u001B[39m\u001B[34m(data, dataset_name, user, node_set, vector_db_config, graph_db_config, dataset_id)\u001B[39m\n\u001B[32m 19\u001B[39m tasks = [\n\u001B[32m 20\u001B[39m Task(resolve_data_directories),\n\u001B[32m 21\u001B[39m Task(ingest_data, dataset_name, user, node_set, dataset_id),\n\u001B[32m 22\u001B[39m ]\n\u001B[32m 24\u001B[39m pipeline_run_info = \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[32m---> \u001B[39m\u001B[32m26\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m run_info \u001B[38;5;129;01min\u001B[39;00m cognee_pipeline(\n\u001B[32m 27\u001B[39m tasks=tasks,\n\u001B[32m 28\u001B[39m datasets=dataset_id \u001B[38;5;28;01mif\u001B[39;00m dataset_id \u001B[38;5;28;01melse\u001B[39;00m dataset_name,\n\u001B[32m 29\u001B[39m data=data,\n\u001B[32m 30\u001B[39m user=user,\n\u001B[32m 31\u001B[39m pipeline_name=\u001B[33m\"\u001B[39m\u001B[33madd_pipeline\u001B[39m\u001B[33m\"\u001B[39m,\n\u001B[32m 32\u001B[39m vector_db_config=vector_db_config,\n\u001B[32m 33\u001B[39m graph_db_config=graph_db_config,\n\u001B[32m 34\u001B[39m ):\n\u001B[32m 35\u001B[39m pipeline_run_info = run_info\n\u001B[32m 37\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m pipeline_run_info\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/Projects/release_test/cognee/cognee/modules/pipelines/operations/pipeline.py:63\u001B[39m, in \u001B[36mcognee_pipeline\u001B[39m\u001B[34m(tasks, data, datasets, user, pipeline_name, vector_db_config, graph_db_config)\u001B[39m\n\u001B[32m 60\u001B[39m context_graph_db_config.set(graph_db_config)\n\u001B[32m 62\u001B[39m \u001B[38;5;66;03m# Create tables for databases\u001B[39;00m\n\u001B[32m---> \u001B[39m\u001B[32m63\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m create_relational_db_and_tables()\n\u001B[32m 64\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m create_pgvector_db_and_tables()\n\u001B[32m 66\u001B[39m \u001B[38;5;66;03m# Initialize first_run attribute if it doesn't exist\u001B[39;00m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/Projects/release_test/cognee/cognee/infrastructure/databases/relational/create_db_and_tables.py:13\u001B[39m, in \u001B[36mcreate_db_and_tables\u001B[39m\u001B[34m()\u001B[39m\n\u001B[32m 5\u001B[39m \u001B[38;5;250m\u001B[39m\u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 6\u001B[39m \u001B[33;03mCreate a database and its tables.\u001B[39;00m\n\u001B[32m 7\u001B[39m \n\u001B[32m 8\u001B[39m \u001B[33;03mThis asynchronous function retrieves the relational engine and calls its method to\u001B[39;00m\n\u001B[32m 9\u001B[39m \u001B[33;03mcreate a database.\u001B[39;00m\n\u001B[32m 10\u001B[39m \u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 11\u001B[39m relational_engine = get_relational_engine()\n\u001B[32m---> \u001B[39m\u001B[32m13\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m relational_engine.create_database()\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/.pyenv/versions/3.11.0/lib/python3.11/contextlib.py:222\u001B[39m, in \u001B[36m_AsyncGeneratorContextManager.__aexit__\u001B[39m\u001B[34m(self, typ, value, traceback)\u001B[39m\n\u001B[32m 220\u001B[39m value = typ()\n\u001B[32m 221\u001B[39m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m222\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m.gen.athrow(typ, value, traceback)\n\u001B[32m 223\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mStopAsyncIteration\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m exc:\n\u001B[32m 224\u001B[39m \u001B[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001B[39;00m\n\u001B[32m 225\u001B[39m \u001B[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001B[39;00m\n\u001B[32m 226\u001B[39m \u001B[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001B[39;00m\n\u001B[32m 227\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m exc \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m value\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/ext/asyncio/engine.py:1066\u001B[39m, in \u001B[36mAsyncEngine.begin\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m 1064\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mwith\u001B[39;00m conn:\n\u001B[32m 1065\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mwith\u001B[39;00m conn.begin():\n\u001B[32m-> \u001B[39m\u001B[32m1066\u001B[39m \u001B[38;5;28;01myield\u001B[39;00m conn\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/Projects/release_test/cognee/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py:445\u001B[39m, in \u001B[36mSQLAlchemyAdapter.create_database\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m 443\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m.engine.begin() \u001B[38;5;28;01mas\u001B[39;00m connection:\n\u001B[32m 444\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(Base.metadata.tables.keys()) > \u001B[32m0\u001B[39m:\n\u001B[32m--> \u001B[39m\u001B[32m445\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m connection.run_sync(Base.metadata.create_all)\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/ext/asyncio/engine.py:887\u001B[39m, in \u001B[36mAsyncConnection.run_sync\u001B[39m\u001B[34m(self, fn, *arg, **kw)\u001B[39m\n\u001B[32m 819\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mrun_sync\u001B[39m(\n\u001B[32m 820\u001B[39m \u001B[38;5;28mself\u001B[39m,\n\u001B[32m 821\u001B[39m fn: Callable[Concatenate[Connection, _P], _T],\n\u001B[32m 822\u001B[39m *arg: _P.args,\n\u001B[32m 823\u001B[39m **kw: _P.kwargs,\n\u001B[32m 824\u001B[39m ) -> _T:\n\u001B[32m 825\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m'''Invoke the given synchronous (i.e. not async) callable,\u001B[39;00m\n\u001B[32m 826\u001B[39m \u001B[33;03m passing a synchronous-style :class:`_engine.Connection` as the first\u001B[39;00m\n\u001B[32m 827\u001B[39m \u001B[33;03m argument.\u001B[39;00m\n\u001B[32m (...)\u001B[39m\u001B[32m 884\u001B[39m \n\u001B[32m 885\u001B[39m \u001B[33;03m '''\u001B[39;00m \u001B[38;5;66;03m# noqa: E501\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m887\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m greenlet_spawn(\n\u001B[32m 888\u001B[39m fn, \u001B[38;5;28mself\u001B[39m._proxied, *arg, _require_await=\u001B[38;5;28;01mFalse\u001B[39;00m, **kw\n\u001B[32m 889\u001B[39m )\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py:201\u001B[39m, in \u001B[36mgreenlet_spawn\u001B[39m\u001B[34m(fn, _require_await, *args, **kwargs)\u001B[39m\n\u001B[32m 196\u001B[39m value = \u001B[38;5;28;01mawait\u001B[39;00m result\n\u001B[32m 197\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m:\n\u001B[32m 198\u001B[39m \u001B[38;5;66;03m# this allows an exception to be raised within\u001B[39;00m\n\u001B[32m 199\u001B[39m \u001B[38;5;66;03m# the moderated greenlet so that it can continue\u001B[39;00m\n\u001B[32m 200\u001B[39m \u001B[38;5;66;03m# its expected flow.\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m201\u001B[39m result = \u001B[43mcontext\u001B[49m\u001B[43m.\u001B[49m\u001B[43mthrow\u001B[49m\u001B[43m(\u001B[49m\u001B[43m*\u001B[49m\u001B[43msys\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexc_info\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 202\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m 203\u001B[39m result = context.switch(value)\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/schema.py:5907\u001B[39m, in \u001B[36mMetaData.create_all\u001B[39m\u001B[34m(self, bind, tables, checkfirst)\u001B[39m\n\u001B[32m 5883\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mcreate_all\u001B[39m(\n\u001B[32m 5884\u001B[39m \u001B[38;5;28mself\u001B[39m,\n\u001B[32m 5885\u001B[39m bind: _CreateDropBind,\n\u001B[32m 5886\u001B[39m tables: Optional[_typing_Sequence[Table]] = \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[32m 5887\u001B[39m checkfirst: \u001B[38;5;28mbool\u001B[39m = \u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[32m 5888\u001B[39m ) -> \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 5889\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m\"\"\"Create all tables stored in this metadata.\u001B[39;00m\n\u001B[32m 5890\u001B[39m \n\u001B[32m 5891\u001B[39m \u001B[33;03m Conditional by default, will not attempt to recreate tables already\u001B[39;00m\n\u001B[32m (...)\u001B[39m\u001B[32m 5905\u001B[39m \n\u001B[32m 5906\u001B[39m \u001B[33;03m \"\"\"\u001B[39;00m\n\u001B[32m-> \u001B[39m\u001B[32m5907\u001B[39m \u001B[43mbind\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_run_ddl_visitor\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 5908\u001B[39m \u001B[43m \u001B[49m\u001B[43mddl\u001B[49m\u001B[43m.\u001B[49m\u001B[43mSchemaGenerator\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcheckfirst\u001B[49m\u001B[43m=\u001B[49m\u001B[43mcheckfirst\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtables\u001B[49m\u001B[43m=\u001B[49m\u001B[43mtables\u001B[49m\n\u001B[32m 5909\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2456\u001B[39m, in \u001B[36mConnection._run_ddl_visitor\u001B[39m\u001B[34m(self, visitorcallable, element, **kwargs)\u001B[39m\n\u001B[32m 2444\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_run_ddl_visitor\u001B[39m(\n\u001B[32m 2445\u001B[39m \u001B[38;5;28mself\u001B[39m,\n\u001B[32m 2446\u001B[39m visitorcallable: Type[Union[SchemaGenerator, SchemaDropper]],\n\u001B[32m 2447\u001B[39m element: SchemaItem,\n\u001B[32m 2448\u001B[39m **kwargs: Any,\n\u001B[32m 2449\u001B[39m ) -> \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 2450\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m\"\"\"run a DDL visitor.\u001B[39;00m\n\u001B[32m 2451\u001B[39m \n\u001B[32m 2452\u001B[39m \u001B[33;03m This method is only here so that the MockConnection can change the\u001B[39;00m\n\u001B[32m 2453\u001B[39m \u001B[33;03m options given to the visitor so that \"checkfirst\" is skipped.\u001B[39;00m\n\u001B[32m 2454\u001B[39m \n\u001B[32m 2455\u001B[39m \u001B[33;03m \"\"\"\u001B[39;00m\n\u001B[32m-> \u001B[39m\u001B[32m2456\u001B[39m \u001B[43mvisitorcallable\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\u001B[43m.\u001B[49m\u001B[43mtraverse_single\u001B[49m\u001B[43m(\u001B[49m\u001B[43melement\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/visitors.py:664\u001B[39m, in \u001B[36mExternalTraversal.traverse_single\u001B[39m\u001B[34m(self, obj, **kw)\u001B[39m\n\u001B[32m 662\u001B[39m meth = \u001B[38;5;28mgetattr\u001B[39m(v, \u001B[33m\"\u001B[39m\u001B[33mvisit_\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m\"\u001B[39m % obj.__visit_name__, \u001B[38;5;28;01mNone\u001B[39;00m)\n\u001B[32m 663\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m meth:\n\u001B[32m--> \u001B[39m\u001B[32m664\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmeth\u001B[49m\u001B[43m(\u001B[49m\u001B[43mobj\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkw\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/ddl.py:978\u001B[39m, in \u001B[36mSchemaGenerator.visit_metadata\u001B[39m\u001B[34m(self, metadata)\u001B[39m\n\u001B[32m 976\u001B[39m \u001B[38;5;28;01mfor\u001B[39;00m table, fkcs \u001B[38;5;129;01min\u001B[39;00m collection:\n\u001B[32m 977\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m table \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m978\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mtraverse_single\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 979\u001B[39m \u001B[43m \u001B[49m\u001B[43mtable\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 980\u001B[39m \u001B[43m \u001B[49m\u001B[43mcreate_ok\u001B[49m\u001B[43m=\u001B[49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[32m 981\u001B[39m \u001B[43m \u001B[49m\u001B[43minclude_foreign_key_constraints\u001B[49m\u001B[43m=\u001B[49m\u001B[43mfkcs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 982\u001B[39m \u001B[43m \u001B[49m\u001B[43m_is_metadata_operation\u001B[49m\u001B[43m=\u001B[49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[32m 983\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 984\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m 985\u001B[39m \u001B[38;5;28;01mfor\u001B[39;00m fkc \u001B[38;5;129;01min\u001B[39;00m fkcs:\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/visitors.py:664\u001B[39m, in \u001B[36mExternalTraversal.traverse_single\u001B[39m\u001B[34m(self, obj, **kw)\u001B[39m\n\u001B[32m 662\u001B[39m meth = \u001B[38;5;28mgetattr\u001B[39m(v, \u001B[33m\"\u001B[39m\u001B[33mvisit_\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m\"\u001B[39m % obj.__visit_name__, \u001B[38;5;28;01mNone\u001B[39;00m)\n\u001B[32m 663\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m meth:\n\u001B[32m--> \u001B[39m\u001B[32m664\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmeth\u001B[49m\u001B[43m(\u001B[49m\u001B[43mobj\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m*\u001B[49m\u001B[43m*\u001B[49m\u001B[43mkw\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/ddl.py:1016\u001B[39m, in \u001B[36mSchemaGenerator.visit_table\u001B[39m\u001B[34m(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)\u001B[39m\n\u001B[32m 1007\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m.dialect.supports_alter:\n\u001B[32m 1008\u001B[39m \u001B[38;5;66;03m# e.g., don't omit any foreign key constraints\u001B[39;00m\n\u001B[32m 1009\u001B[39m include_foreign_key_constraints = \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[32m 1011\u001B[39m \u001B[43mCreateTable\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1012\u001B[39m \u001B[43m \u001B[49m\u001B[43mtable\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1013\u001B[39m \u001B[43m \u001B[49m\u001B[43minclude_foreign_key_constraints\u001B[49m\u001B[43m=\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1014\u001B[39m \u001B[43m \u001B[49m\u001B[43minclude_foreign_key_constraints\u001B[49m\n\u001B[32m 1015\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m-> \u001B[39m\u001B[32m1016\u001B[39m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_invoke_with\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mconnection\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1018\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mhasattr\u001B[39m(table, \u001B[33m\"\u001B[39m\u001B[33mindexes\u001B[39m\u001B[33m\"\u001B[39m):\n\u001B[32m 1019\u001B[39m \u001B[38;5;28;01mfor\u001B[39;00m index \u001B[38;5;129;01min\u001B[39;00m table.indexes:\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/ddl.py:314\u001B[39m, in \u001B[36mExecutableDDLElement._invoke_with\u001B[39m\u001B[34m(self, bind)\u001B[39m\n\u001B[32m 312\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_invoke_with\u001B[39m(\u001B[38;5;28mself\u001B[39m, bind):\n\u001B[32m 313\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m._should_execute(\u001B[38;5;28mself\u001B[39m.target, bind):\n\u001B[32m--> \u001B[39m\u001B[32m314\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mbind\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1416\u001B[39m, in \u001B[36mConnection.execute\u001B[39m\u001B[34m(self, statement, parameters, execution_options)\u001B[39m\n\u001B[32m 1414\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m exc.ObjectNotExecutableError(statement) \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01merr\u001B[39;00m\n\u001B[32m 1415\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m-> \u001B[39m\u001B[32m1416\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmeth\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1417\u001B[39m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[32m 1418\u001B[39m \u001B[43m \u001B[49m\u001B[43mdistilled_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1419\u001B[39m \u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mNO_OPTIONS\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1420\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/sql/ddl.py:180\u001B[39m, in \u001B[36mExecutableDDLElement._execute_on_connection\u001B[39m\u001B[34m(self, connection, distilled_params, execution_options)\u001B[39m\n\u001B[32m 177\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_execute_on_connection\u001B[39m(\n\u001B[32m 178\u001B[39m \u001B[38;5;28mself\u001B[39m, connection, distilled_params, execution_options\n\u001B[32m 179\u001B[39m ):\n\u001B[32m--> \u001B[39m\u001B[32m180\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mconnection\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_execute_ddl\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 181\u001B[39m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdistilled_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mexecution_options\u001B[49m\n\u001B[32m 182\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1527\u001B[39m, in \u001B[36mConnection._execute_ddl\u001B[39m\u001B[34m(self, ddl, distilled_parameters, execution_options)\u001B[39m\n\u001B[32m 1522\u001B[39m dialect = \u001B[38;5;28mself\u001B[39m.dialect\n\u001B[32m 1524\u001B[39m compiled = ddl.compile(\n\u001B[32m 1525\u001B[39m dialect=dialect, schema_translate_map=schema_translate_map\n\u001B[32m 1526\u001B[39m )\n\u001B[32m-> \u001B[39m\u001B[32m1527\u001B[39m ret = \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_execute_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1528\u001B[39m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1529\u001B[39m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecution_ctx_cls\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_init_ddl\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1530\u001B[39m \u001B[43m \u001B[49m\u001B[43mcompiled\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1531\u001B[39m \u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[32m 1532\u001B[39m \u001B[43m \u001B[49m\u001B[43mexec_opts\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1533\u001B[39m \u001B[43m \u001B[49m\u001B[43mcompiled\u001B[49m\u001B[43m,\u001B[49m\n\u001B[32m 1534\u001B[39m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1535\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m._has_events \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m.engine._has_events:\n\u001B[32m 1536\u001B[39m \u001B[38;5;28mself\u001B[39m.dispatch.after_execute(\n\u001B[32m 1537\u001B[39m \u001B[38;5;28mself\u001B[39m,\n\u001B[32m 1538\u001B[39m ddl,\n\u001B[32m (...)\u001B[39m\u001B[32m 1542\u001B[39m ret,\n\u001B[32m 1543\u001B[39m )\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1843\u001B[39m, in \u001B[36mConnection._execute_context\u001B[39m\u001B[34m(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)\u001B[39m\n\u001B[32m 1841\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m._exec_insertmany_context(dialect, context)\n\u001B[32m 1842\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m-> \u001B[39m\u001B[32m1843\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_exec_single_context\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1844\u001B[39m \u001B[43m \u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\n\u001B[32m 1845\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1983\u001B[39m, in \u001B[36mConnection._exec_single_context\u001B[39m\u001B[34m(self, dialect, context, statement, parameters)\u001B[39m\n\u001B[32m 1980\u001B[39m result = context._setup_result_proxy()\n\u001B[32m 1982\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[32m-> \u001B[39m\u001B[32m1983\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_handle_dbapi_exception\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1984\u001B[39m \u001B[43m \u001B[49m\u001B[43me\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstr_statement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43meffective_parameters\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\n\u001B[32m 1985\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1987\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m result\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:2352\u001B[39m, in \u001B[36mConnection._handle_dbapi_exception\u001B[39m\u001B[34m(self, e, statement, parameters, cursor, context, is_sub_exec)\u001B[39m\n\u001B[32m 2350\u001B[39m \u001B[38;5;28;01melif\u001B[39;00m should_wrap:\n\u001B[32m 2351\u001B[39m \u001B[38;5;28;01massert\u001B[39;00m sqlalchemy_exception \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[32m-> \u001B[39m\u001B[32m2352\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m sqlalchemy_exception.with_traceback(exc_info[\u001B[32m2\u001B[39m]) \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01me\u001B[39;00m\n\u001B[32m 2353\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m 2354\u001B[39m \u001B[38;5;28;01massert\u001B[39;00m exc_info[\u001B[32m1\u001B[39m] \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/base.py:1964\u001B[39m, in \u001B[36mConnection._exec_single_context\u001B[39m\u001B[34m(self, dialect, context, statement, parameters)\u001B[39m\n\u001B[32m 1962\u001B[39m \u001B[38;5;28;01mbreak\u001B[39;00m\n\u001B[32m 1963\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m evt_handled:\n\u001B[32m-> \u001B[39m\u001B[32m1964\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mdialect\u001B[49m\u001B[43m.\u001B[49m\u001B[43mdo_execute\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m 1965\u001B[39m \u001B[43m \u001B[49m\u001B[43mcursor\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstr_statement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43meffective_parameters\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcontext\u001B[49m\n\u001B[32m 1966\u001B[39m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 1968\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m._has_events \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m.engine._has_events:\n\u001B[32m 1969\u001B[39m \u001B[38;5;28mself\u001B[39m.dispatch.after_cursor_execute(\n\u001B[32m 1970\u001B[39m \u001B[38;5;28mself\u001B[39m,\n\u001B[32m 1971\u001B[39m cursor,\n\u001B[32m (...)\u001B[39m\u001B[32m 1975\u001B[39m context.executemany,\n\u001B[32m 1976\u001B[39m )\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/engine/default.py:942\u001B[39m, in \u001B[36mDefaultDialect.do_execute\u001B[39m\u001B[34m(self, cursor, statement, parameters, context)\u001B[39m\n\u001B[32m 941\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mdo_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, cursor, statement, parameters, context=\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[32m--> \u001B[39m\u001B[32m942\u001B[39m \u001B[43mcursor\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstatement\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:172\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_cursor.execute\u001B[39m\u001B[34m(self, operation, parameters)\u001B[39m\n\u001B[32m 170\u001B[39m \u001B[38;5;28mself\u001B[39m._cursor = _cursor\n\u001B[32m 171\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[32m--> \u001B[39m\u001B[32m172\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_adapt_connection\u001B[49m\u001B[43m.\u001B[49m\u001B[43m_handle_exception\u001B[49m\u001B[43m(\u001B[49m\u001B[43merror\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:323\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_connection._handle_exception\u001B[39m\u001B[34m(self, error)\u001B[39m\n\u001B[32m 319\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;28mself\u001B[39m.dbapi.sqlite.OperationalError(\n\u001B[32m 320\u001B[39m \u001B[33m\"\u001B[39m\u001B[33mno active connection\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m 321\u001B[39m ) \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01merror\u001B[39;00m\n\u001B[32m 322\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m323\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m error\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/dialects/sqlite/aiosqlite.py:154\u001B[39m, in \u001B[36mAsyncAdapt_aiosqlite_cursor.execute\u001B[39m\u001B[34m(self, operation, parameters)\u001B[39m\n\u001B[32m 152\u001B[39m \u001B[38;5;28mself\u001B[39m.await_(_cursor.execute(operation))\n\u001B[32m 153\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m154\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mawait_\u001B[49m\u001B[43m(\u001B[49m\u001B[43m_cursor\u001B[49m\u001B[43m.\u001B[49m\u001B[43mexecute\u001B[49m\u001B[43m(\u001B[49m\u001B[43moperation\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 156\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m _cursor.description:\n\u001B[32m 157\u001B[39m \u001B[38;5;28mself\u001B[39m.description = _cursor.description\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py:132\u001B[39m, in \u001B[36mawait_only\u001B[39m\u001B[34m(awaitable)\u001B[39m\n\u001B[32m 123\u001B[39m \u001B[38;5;28;01mraise\u001B[39;00m exc.MissingGreenlet(\n\u001B[32m 124\u001B[39m \u001B[33m\"\u001B[39m\u001B[33mgreenlet_spawn has not been called; can\u001B[39m\u001B[33m'\u001B[39m\u001B[33mt call await_only() \u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m 125\u001B[39m \u001B[33m\"\u001B[39m\u001B[33mhere. Was IO attempted in an unexpected place?\u001B[39m\u001B[33m\"\u001B[39m\n\u001B[32m 126\u001B[39m )\n\u001B[32m 128\u001B[39m \u001B[38;5;66;03m# returns the control to the driver greenlet passing it\u001B[39;00m\n\u001B[32m 129\u001B[39m \u001B[38;5;66;03m# a coroutine to run. Once the awaitable is done, the driver greenlet\u001B[39;00m\n\u001B[32m 130\u001B[39m \u001B[38;5;66;03m# switches back to this greenlet with the result of awaitable that is\u001B[39;00m\n\u001B[32m 131\u001B[39m \u001B[38;5;66;03m# then returned to the caller (or raised as error)\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m132\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mcurrent\u001B[49m\u001B[43m.\u001B[49m\u001B[43mparent\u001B[49m\u001B[43m.\u001B[49m\u001B[43mswitch\u001B[49m\u001B[43m(\u001B[49m\u001B[43mawaitable\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/sqlalchemy/util/_concurrency_py3k.py:196\u001B[39m, in \u001B[36mgreenlet_spawn\u001B[39m\u001B[34m(fn, _require_await, *args, **kwargs)\u001B[39m\n\u001B[32m 192\u001B[39m switch_occurred = \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[32m 193\u001B[39m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[32m 194\u001B[39m \u001B[38;5;66;03m# wait for a coroutine from await_only and then return its\u001B[39;00m\n\u001B[32m 195\u001B[39m \u001B[38;5;66;03m# result back to it.\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m196\u001B[39m value = \u001B[38;5;28;01mawait\u001B[39;00m result\n\u001B[32m 197\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mBaseException\u001B[39;00m:\n\u001B[32m 198\u001B[39m \u001B[38;5;66;03m# this allows an exception to be raised within\u001B[39;00m\n\u001B[32m 199\u001B[39m \u001B[38;5;66;03m# the moderated greenlet so that it can continue\u001B[39;00m\n\u001B[32m 200\u001B[39m \u001B[38;5;66;03m# its expected flow.\u001B[39;00m\n\u001B[32m 201\u001B[39m result = context.throw(*sys.exc_info())\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/cursor.py:48\u001B[39m, in \u001B[36mCursor.execute\u001B[39m\u001B[34m(self, sql, parameters)\u001B[39m\n\u001B[32m 46\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m parameters \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 47\u001B[39m parameters = []\n\u001B[32m---> \u001B[39m\u001B[32m48\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m._execute(\u001B[38;5;28mself\u001B[39m._cursor.execute, sql, parameters)\n\u001B[32m 49\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/cursor.py:40\u001B[39m, in \u001B[36mCursor._execute\u001B[39m\u001B[34m(self, fn, *args, **kwargs)\u001B[39m\n\u001B[32m 38\u001B[39m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_execute\u001B[39m(\u001B[38;5;28mself\u001B[39m, fn, *args, **kwargs):\n\u001B[32m 39\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m\"\"\"Execute the given function on the shared connection's thread.\"\"\"\u001B[39;00m\n\u001B[32m---> \u001B[39m\u001B[32m40\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m._conn._execute(fn, *args, **kwargs)\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/core.py:132\u001B[39m, in \u001B[36mConnection._execute\u001B[39m\u001B[34m(self, fn, *args, **kwargs)\u001B[39m\n\u001B[32m 128\u001B[39m future = asyncio.get_event_loop().create_future()\n\u001B[32m 130\u001B[39m \u001B[38;5;28mself\u001B[39m._tx.put_nowait((future, function))\n\u001B[32m--> \u001B[39m\u001B[32m132\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m future\n",
"\u001B[36mFile \u001B[39m\u001B[32m~/cognee/.venv/lib/python3.11/site-packages/aiosqlite/core.py:115\u001B[39m, in \u001B[36mConnection.run\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m 113\u001B[39m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[32m 114\u001B[39m LOG.debug(\u001B[33m\"\u001B[39m\u001B[33mexecuting \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m\"\u001B[39m, function)\n\u001B[32m--> \u001B[39m\u001B[32m115\u001B[39m result = \u001B[43mfunction\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 116\u001B[39m LOG.debug(\u001B[33m\"\u001B[39m\u001B[33moperation \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[33m completed\u001B[39m\u001B[33m\"\u001B[39m, function)\n\u001B[32m 117\u001B[39m future.get_loop().call_soon_threadsafe(set_result, future, result)\n",
"\u001B[31mOperationalError\u001B[39m: (sqlite3.OperationalError) database is locked\n[SQL: \nCREATE TABLE data (\n\tid UUID NOT NULL, \n\tname VARCHAR, \n\textension VARCHAR, \n\tmime_type VARCHAR, \n\traw_data_location VARCHAR, \n\towner_id UUID, \n\tcontent_hash VARCHAR, \n\texternal_metadata JSON, \n\tnode_set JSON, \n\ttoken_count INTEGER, \n\tcreated_at DATETIME, \n\tupdated_at DATETIME, \n\tPRIMARY KEY (id)\n)\n\n]\n(Background on this error at: https://sqlalche.me/e/20/e3q8)"
]
}
],
"execution_count": 3
]
},
{
"cell_type": "markdown",
@ -203,12 +336,39 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:44:56.372628Z",
"start_time": "2025-06-30T11:44:55.978258Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[2m2025-08-27T14:36:09.273837\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting completion generation for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.275355\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting summary retrieval for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.691101\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFound 2 summaries from vector search\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.691827\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning 2 summary payloads \u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.692207\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning context with 2 item(s)\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'facab42e-12fc-557e-aaf4-09c02ae1cd4f', 'created_at': 1756305273061, 'updated_at': 1756305273061, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Programmers won't change a light bulb — it's considered a hardware issue.\"}\n",
"{'id': '958f2bc9-060b-5500-b14a-19b300cc99aa', 'created_at': 1756305311791, 'updated_at': 1756305311791, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'One-line programmer joke: changing a light bulb is labeled a hardware issue.'}\n"
]
}
],
"source": [
"from cognee.api.v1.search import SearchType\n",
"\n",
@ -221,25 +381,36 @@
"# Display search results\n",
"for result_text in search_results:\n",
" print(result_text)"
],
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': '3b530220-7e7c-52a2-8b62-ce5adce1a46c', 'created_at': 1751283883122, 'updated_at': 1751283883122, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"The joke queries the number of programmers required to change a light bulb and answers, 'None. Thats a hardware issue.' This humor highlights the divide between software and hardware challenges in programming.\"}\n",
"{'id': '128eb96e-fd36-53ef-ab6d-d4884ecbfee9', 'created_at': 1751283883122, 'updated_at': 1751283883122, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Changing a light bulb doesn't require programmers.\"}\n"
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"execution_count": 5
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": ""
"source": [
"# Only exit in interactive mode, not during GitHub Actions\n",
"import os\n",
"\n",
"# Skip exit if we're running in GitHub Actions\n",
"if not os.environ.get('GITHUB_ACTIONS'):\n",
" print(\"Exiting kernel to clean up resources...\")\n",
" os._exit(0)\n",
"else:\n",
" print(\"Skipping kernel exit - running in GitHub Actions\")"
]
}
],
"metadata": {
@ -258,7 +429,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.12.7"
}
},
"nbformat": 4,

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

1968
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
[project]
name = "cognee"
version = "0.2.3.dev1"
version = "0.2.4"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },
@ -34,7 +34,7 @@ dependencies = [
"sqlalchemy>=2.0.39,<3.0.0",
"aiosqlite>=0.20.0,<1.0.0",
"tiktoken>=0.8.0,<1.0.0",
"litellm>=1.57.4, <1.71.0",
"litellm>=1.71.0, <2.0.0",
"instructor>=1.9.1,<2.0.0",
"langfuse>=2.32.0,<3",
"filetype>=1.2.0,<2.0.0",
@ -60,7 +60,8 @@ dependencies = [
"pympler>=1.1,<2.0.0",
"onnxruntime>=1.0.0,<2.0.0",
"pylance>=0.22.0,<1.0.0",
"kuzu (==0.11.0)"
"kuzu (==0.11.0)",
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
]
[project.optional-dependencies]
@ -145,6 +146,9 @@ debug = ["debugpy>=1.8.9,<2.0.0"]
Homepage = "https://www.cognee.ai"
Repository = "https://github.com/topoteretes/cognee"
[project.scripts]
cognee = "cognee.cli._cognee:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
@ -186,4 +190,4 @@ exclude = [
]
[tool.ruff.lint]
ignore = ["F401"]
ignore = ["F401"]

7673
uv.lock generated

File diff suppressed because it is too large Load diff