Merge branch 'dev' into feature/ontology-param-rest

This commit is contained in:
hajdul88 2025-11-06 09:40:04 +01:00 committed by GitHub
commit cc058b4482
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
74 changed files with 2031 additions and 509 deletions

View file

@ -169,8 +169,9 @@ REQUIRE_AUTHENTICATION=False
# Vector: LanceDB
# Graph: KuzuDB
#
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
ENABLE_BACKEND_ACCESS_CONTROL=False
# It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers.
# Disable mode when using not supported graph/vector databases.
ENABLE_BACKEND_ACCESS_CONTROL=True
################################################################################
# ☁️ Cloud Sync Settings
@ -242,13 +243,14 @@ LITELLM_LOG="ERROR"
########## Local LLM via Ollama ###############################################
#LLM_API_KEY ="ollama"
#LLM_MODEL="llama3.1:8b"
#LLM_PROVIDER="ollama"
#LLM_ENDPOINT="http://localhost:11434/v1"
#EMBEDDING_PROVIDER="ollama"
#EMBEDDING_MODEL="nomic-embed-text:latest"
#EMBEDDING_ENDPOINT="http://localhost:11434/api/embeddings"
#EMBEDDING_ENDPOINT="http://localhost:11434/api/embed"
#EMBEDDING_DIMENSIONS=768
#HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"

View file

@ -7,14 +7,29 @@ on:
jobs:
docker-build-and-push:
runs-on: ubuntu-latest
runs-on:
group: Default
labels:
- docker_build_runner
steps:
- name: Check and free disk space before build
run: |
echo "=== Before cleanup ==="
df -h
echo "Removing unused preinstalled SDKs to free space..."
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true
docker system prune -af || true
echo "=== After cleanup ==="
df -h
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --root /tmp/buildkit
- name: Log in to Docker Hub
uses: docker/login-action@v3
@ -34,7 +49,7 @@ jobs:
- name: Build and push
id: build
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
@ -45,5 +60,6 @@ jobs:
cache-from: type=registry,ref=cognee/cognee-mcp:buildcache
cache-to: type=registry,ref=cognee/cognee-mcp:buildcache,mode=max
- name: Image digest
run: echo ${{ steps.build.outputs.digest }}

View file

@ -447,3 +447,44 @@ jobs:
DB_USERNAME: cognee
DB_PASSWORD: cognee
run: uv run python ./cognee/tests/test_conversation_history.py
test-load:
name: Test Load
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Set File Descriptor Limit
run: sudo prlimit --pid $$ --nofile=4096:4096
- name: Verify File Descriptor Limit
run: ulimit -n
- name: Dependencies already installed
run: echo "Dependencies already installed in setup"
- name: Run Load Test
env:
ENV: 'dev'
ENABLE_BACKEND_ACCESS_CONTROL: True
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
STORAGE_BACKEND: s3
AWS_REGION: eu-west-1
AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
run: uv run python ./cognee/tests/test_load.py

View file

@ -210,6 +210,31 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./examples/python/memify_coding_agent_example.py
test-custom-pipeline:
name: Run Custom Pipeline Example
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run Custom Pipeline Example
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: uv run python ./examples/python/run_custom_pipeline_example.py
test-permissions-example:
name: Run Permissions Example
runs-on: ubuntu-22.04

View file

@ -84,6 +84,7 @@ jobs:
GRAPH_DATABASE_PROVIDER: 'neo4j'
VECTOR_DB_PROVIDER: 'lancedb'
DB_PROVIDER: 'sqlite'
ENABLE_BACKEND_ACCESS_CONTROL: 'false'
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
@ -135,6 +136,7 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPH_DATABASE_PROVIDER: 'kuzu'
VECTOR_DB_PROVIDER: 'pgvector'
ENABLE_BACKEND_ACCESS_CONTROL: 'false'
DB_PROVIDER: 'postgres'
DB_NAME: 'cognee_db'
DB_HOST: '127.0.0.1'
@ -197,6 +199,7 @@ jobs:
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
ENABLE_BACKEND_ACCESS_CONTROL: 'false'
DB_NAME: cognee_db
DB_HOST: 127.0.0.1
DB_PORT: 5432

View file

@ -10,6 +10,10 @@ on:
required: false
type: string
default: '["3.10.x", "3.12.x", "3.13.x"]'
os:
required: false
type: string
default: '["ubuntu-22.04", "macos-15", "windows-latest"]'
secrets:
LLM_PROVIDER:
required: true
@ -40,10 +44,11 @@ jobs:
run-unit-tests:
name: Unit tests ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ubuntu-22.04, macos-15, windows-latest]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out
@ -76,10 +81,11 @@ jobs:
run-integration-tests:
name: Integration tests ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-15, windows-latest ]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out
@ -112,10 +118,11 @@ jobs:
run-library-test:
name: Library test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-15, windows-latest ]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out
@ -148,10 +155,11 @@ jobs:
run-build-test:
name: Build test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-15, windows-latest ]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out
@ -177,10 +185,11 @@ jobs:
run-soft-deletion-test:
name: Soft Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-15, windows-latest ]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out
@ -214,10 +223,11 @@ jobs:
run-hard-deletion-test:
name: Hard Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}
os: [ ubuntu-22.04, macos-15, windows-latest ]
os: ${{ fromJSON(inputs.os) }}
fail-fast: false
steps:
- name: Check out

View file

@ -75,7 +75,7 @@ jobs:
{ "role": "user", "content": "Whatever I say, answer with Yes." }
]
}'
curl -X POST http://127.0.0.1:11434/v1/embeddings \
curl -X POST http://127.0.0.1:11434/api/embed \
-H "Content-Type: application/json" \
-d '{
"model": "avr/sfr-embedding-mistral:latest",
@ -98,7 +98,7 @@ jobs:
LLM_MODEL: "phi4"
EMBEDDING_PROVIDER: "ollama"
EMBEDDING_MODEL: "avr/sfr-embedding-mistral:latest"
EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings"
EMBEDDING_ENDPOINT: "http://localhost:11434/api/embed"
EMBEDDING_DIMENSIONS: "4096"
HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral"
run: uv run python ./examples/python/simple_example.py

View file

@ -1,4 +1,6 @@
name: Test Suites
permissions:
contents: read
on:
push:
@ -80,12 +82,22 @@ jobs:
uses: ./.github/workflows/notebooks_tests.yml
secrets: inherit
different-operating-systems-tests:
name: Operating System and Python Tests
different-os-tests-basic:
name: OS and Python Tests Ubuntu
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/test_different_operating_systems.yml
with:
python-versions: '["3.10.x", "3.11.x", "3.12.x", "3.13.x"]'
os: '["ubuntu-22.04"]'
secrets: inherit
different-os-tests-extended:
name: OS and Python Tests Extended
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/test_different_operating_systems.yml
with:
python-versions: '["3.13.x"]'
os: '["macos-15", "windows-latest"]'
secrets: inherit
# Matrix-based vector database tests
@ -135,7 +147,8 @@ jobs:
e2e-tests,
graph-db-tests,
notebook-tests,
different-operating-systems-tests,
different-os-tests-basic,
different-os-tests-extended,
vector-db-tests,
example-tests,
llm-tests,
@ -155,7 +168,8 @@ jobs:
cli-tests,
graph-db-tests,
notebook-tests,
different-operating-systems-tests,
different-os-tests-basic,
different-os-tests-extended,
vector-db-tests,
example-tests,
db-examples-tests,
@ -176,7 +190,8 @@ jobs:
"${{ needs.cli-tests.result }}" == "success" &&
"${{ needs.graph-db-tests.result }}" == "success" &&
"${{ needs.notebook-tests.result }}" == "success" &&
"${{ needs.different-operating-systems-tests.result }}" == "success" &&
"${{ needs.different-os-tests-basic.result }}" == "success" &&
"${{ needs.different-os-tests-extended.result }}" == "success" &&
"${{ needs.vector-db-tests.result }}" == "success" &&
"${{ needs.example-tests.result }}" == "success" &&
"${{ needs.db-examples-tests.result }}" == "success" &&

132
AGENTS.md Normal file
View file

@ -0,0 +1,132 @@
## Repository Guidelines
This document summarizes how to work with the cognee repository: how its organized, how to build, test, lint, and contribute. It mirrors our actual tooling and CI while providing quick commands for local development.
## Project Structure & Module Organization
- `cognee/`: Core Python library and API.
- `api/`: FastAPI application and versioned routers (add, cognify, memify, search, delete, users, datasets, responses, visualize, settings, sync, update, checks).
- `cli/`: CLI entry points and subcommands invoked via `cognee` / `cognee-cli`.
- `infrastructure/`: Databases, LLM providers, embeddings, loaders, and storage adapters.
- `modules/`: Domain logic (graph, retrieval, ontology, users, processing, observability, etc.).
- `tasks/`: Reusable tasks (e.g., code graph, web scraping, storage). Extend with new tasks here.
- `eval_framework/`: Evaluation utilities and adapters.
- `shared/`: Cross-cutting helpers (logging, settings, utils).
- `tests/`: Unit, integration, CLI, and end-to-end tests organized by feature.
- `__main__.py`: Entrypoint to route to CLI.
- `cognee-mcp/`: Model Context Protocol server exposing cognee as MCP tools (SSE/HTTP/stdio). Contains its own README and Dockerfile.
- `cognee-frontend/`: Next.js UI for local development and demos.
- `distributed/`: Utilities for distributed execution (Modal, workers, queues).
- `examples/`: Example scripts demonstrating the public APIs and features (graph, code graph, multimodal, permissions, etc.).
- `notebooks/`: Jupyter notebooks for demos and tutorials.
- `alembic/`: Database migrations for relational backends.
Notes:
- Co-locate feature-specific helpers under their respective package (`modules/`, `infrastructure/`, or `tasks/`).
- Extend the system by adding new tasks, loaders, or retrievers rather than modifying core pipeline mechanisms.
## Build, Test, and Development Commands
Python (root) requires Python >= 3.10 and < 3.14. We recommend `uv` for speed and reproducibility.
- Create/refresh env and install dev deps:
```bash
uv sync --dev --all-extras --reinstall
```
- Run the CLI (examples):
```bash
uv run cognee-cli add "Cognee turns documents into AI memory."
uv run cognee-cli cognify
uv run cognee-cli search "What does cognee do?"
uv run cognee-cli -ui # Launches UI, backend API, and MCP server together
```
- Start the FastAPI server directly:
```bash
uv run python -m cognee.api.client
```
- Run tests (CI mirrors these commands):
```bash
uv run pytest cognee/tests/unit/ -v
uv run pytest cognee/tests/integration/ -v
```
- Lint and format (ruff):
```bash
uv run ruff check .
uv run ruff format .
```
- Optional static type checks (mypy):
```bash
uv run mypy cognee/
```
MCP Server (`cognee-mcp/`):
- Install and run locally:
```bash
cd cognee-mcp
uv sync --dev --all-extras --reinstall
uv run python src/server.py # stdio (default)
uv run python src/server.py --transport sse
uv run python src/server.py --transport http --host 127.0.0.1 --port 8000 --path /mcp
```
- API Mode (connect to a running Cognee API):
```bash
uv run python src/server.py --transport sse --api-url http://localhost:8000 --api-token YOUR_TOKEN
```
- Docker quickstart (examples): see `cognee-mcp/README.md` for full details
```bash
docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
```
Frontend (`cognee-frontend/`):
```bash
cd cognee-frontend
npm install
npm run dev # Next.js dev server
npm run lint # ESLint
npm run build && npm start
```
## Coding Style & Naming Conventions
Python:
- 4-space indentation, modules and functions in `snake_case`, classes in `PascalCase`.
- Public APIs should be type-annotated where practical.
- Use `ruff format` before committing; `ruff check` enforces import hygiene and style (line-length 100 configured in `pyproject.toml`).
- Prefer explicit, structured error handling. Use shared logging utilities in `cognee.shared.logging_utils`.
MCP server and Frontend:
- Follow the local `README.md` and ESLint/TypeScript configuration in `cognee-frontend/`.
## Testing Guidelines
- Place Python tests under `cognee/tests/`.
- Unit tests: `cognee/tests/unit/`
- Integration tests: `cognee/tests/integration/`
- CLI tests: `cognee/tests/cli_tests/`
- Name test files `test_*.py`. Use `pytest.mark.asyncio` for async tests.
- Avoid external state; rely on test fixtures and the CI-provided env vars when LLM/embedding providers are required. See CI workflows under `.github/workflows/` for expected environment variables.
- When adding public APIs, provide/update targeted examples under `examples/python/`.
## Commit & Pull Request Guidelines
- Use clear, imperative subjects (≤ 72 chars) and conventional commit styling in PR titles. Our CI validates semantic PR titles (see `.github/workflows/pr_lint`). Examples:
- `feat(graph): add temporal edge weighting`
- `fix(api): handle missing auth cookie`
- `docs: update installation instructions`
- Reference related issues/discussions in the PR body and provide brief context.
- PRs should describe scope, list local test commands run, and mention any impacts on MCP server or UI if applicable.
- Sign commits and affirm the DCO (see `CONTRIBUTING.md`).
## CI Mirrors Local Commands
Our GitHub Actions run the same ruff checks and pytest suites shown above (`.github/workflows/basic_tests.yml` and related workflows). Use the commands in this document locally to minimize CI surprises.

View file

@ -97,7 +97,7 @@ Hosted platform:
### 📦 Installation
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager..
Cognee supports Python 3.10 to 3.12

View file

@ -110,6 +110,47 @@ If you'd rather run cognee-mcp in a container, you have two options:
# For stdio transport (default)
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
```
**Installing optional dependencies at runtime:**
You can install optional dependencies when running the container by setting the `EXTRAS` environment variable:
```bash
# Install a single optional dependency group at runtime
docker run \
-e TRANSPORT_MODE=http \
-e EXTRAS=aws \
--env-file ./.env \
-p 8000:8000 \
--rm -it cognee/cognee-mcp:main
# Install multiple optional dependency groups at runtime (comma-separated)
docker run \
-e TRANSPORT_MODE=sse \
-e EXTRAS=aws,postgres,neo4j \
--env-file ./.env \
-p 8000:8000 \
--rm -it cognee/cognee-mcp:main
```
**Available optional dependency groups:**
- `aws` - S3 storage support
- `postgres` / `postgres-binary` - PostgreSQL database support
- `neo4j` - Neo4j graph database support
- `neptune` - AWS Neptune support
- `chromadb` - ChromaDB vector store support
- `scraping` - Web scraping capabilities
- `distributed` - Modal distributed execution
- `langchain` - LangChain integration
- `llama-index` - LlamaIndex integration
- `anthropic` - Anthropic models
- `groq` - Groq models
- `mistral` - Mistral models
- `ollama` / `huggingface` - Local model support
- `docs` - Document processing
- `codegraph` - Code analysis
- `monitoring` - Sentry & Langfuse monitoring
- `redis` - Redis support
- And more (see [pyproject.toml](https://github.com/topoteretes/cognee/blob/main/pyproject.toml) for full list)
2. **Pull from Docker Hub** (no build required):
```bash
# With HTTP transport (recommended for web deployments)
@ -119,6 +160,17 @@ If you'd rather run cognee-mcp in a container, you have two options:
# With stdio transport (default)
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
```
**With runtime installation of optional dependencies:**
```bash
# Install optional dependencies from Docker Hub image
docker run \
-e TRANSPORT_MODE=http \
-e EXTRAS=aws,postgres \
--env-file ./.env \
-p 8000:8000 \
--rm -it cognee/cognee-mcp:main
```
### **Important: Docker vs Direct Usage**
**Docker uses environment variables**, not command line arguments:

View file

@ -4,6 +4,42 @@ set -e # Exit on error
echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT"
# Install optional dependencies if EXTRAS is set
if [ -n "$EXTRAS" ]; then
echo "Installing optional dependencies: $EXTRAS"
# Get the cognee version that's currently installed
COGNEE_VERSION=$(uv pip show cognee | grep "Version:" | awk '{print $2}')
echo "Current cognee version: $COGNEE_VERSION"
# Build the extras list for cognee
IFS=',' read -ra EXTRA_ARRAY <<< "$EXTRAS"
# Combine base extras from pyproject.toml with requested extras
ALL_EXTRAS=""
for extra in "${EXTRA_ARRAY[@]}"; do
# Trim whitespace
extra=$(echo "$extra" | xargs)
# Add to extras list if not already present
if [[ ! "$ALL_EXTRAS" =~ (^|,)"$extra"(,|$) ]]; then
if [ -z "$ALL_EXTRAS" ]; then
ALL_EXTRAS="$extra"
else
ALL_EXTRAS="$ALL_EXTRAS,$extra"
fi
fi
done
echo "Installing cognee with extras: $ALL_EXTRAS"
echo "Running: uv pip install 'cognee[$ALL_EXTRAS]==$COGNEE_VERSION'"
uv pip install "cognee[$ALL_EXTRAS]==$COGNEE_VERSION"
# Verify installation
echo ""
echo "✓ Optional dependencies installation completed"
else
echo "No optional dependencies specified"
fi
# Set default transport mode if not specified
TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
echo "Transport mode: $TRANSPORT_MODE"

View file

@ -9,7 +9,7 @@ dependencies = [
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
#"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/igorilic/Desktop/cognee",
# TODO: Remove gemini from optional dependecnies for new Cognee version after 0.3.4
"cognee[postgres,codegraph,huggingface,docs,neo4j]==0.3.7",
"cognee[postgres,docs,neo4j]==0.3.7",
"fastmcp>=2.10.0,<3.0.0",
"mcp>=1.12.0,<2.0.0",
"uv>=0.6.3,<1.0.0",

View file

@ -37,12 +37,10 @@ async def run():
toolResult = await session.call_tool("prune", arguments={})
toolResult = await session.call_tool(
"codify", arguments={"repo_path": "SOME_REPO_PATH"}
)
toolResult = await session.call_tool("cognify", arguments={})
toolResult = await session.call_tool(
"search", arguments={"search_type": "CODE", "search_query": "exceptions"}
"search", arguments={"search_type": "GRAPH_COMPLETION"}
)
print(f"Cognify result: {toolResult.content}")

64
cognee-mcp/uv.lock generated
View file

@ -718,19 +718,10 @@ wheels = [
]
[package.optional-dependencies]
codegraph = [
{ name = "fastembed", marker = "python_full_version < '3.13'" },
{ name = "transformers" },
{ name = "tree-sitter" },
{ name = "tree-sitter-python" },
]
docs = [
{ name = "lxml" },
{ name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "pdf", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] },
]
huggingface = [
{ name = "transformers" },
]
neo4j = [
{ name = "neo4j" },
]
@ -745,7 +736,7 @@ name = "cognee-mcp"
version = "0.4.0"
source = { editable = "." }
dependencies = [
{ name = "cognee", extra = ["codegraph", "docs", "huggingface", "neo4j", "postgres"] },
{ name = "cognee", extra = ["docs", "neo4j", "postgres"] },
{ name = "fastmcp" },
{ name = "httpx" },
{ name = "mcp" },
@ -759,7 +750,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "cognee", extras = ["postgres", "codegraph", "huggingface", "docs", "neo4j"], specifier = "==0.3.7" },
{ name = "cognee", extras = ["postgres", "docs", "neo4j"], specifier = "==0.3.7" },
{ name = "fastmcp", specifier = ">=2.10.0,<3.0.0" },
{ name = "httpx", specifier = ">=0.27.0,<1.0.0" },
{ name = "mcp", specifier = ">=1.12.0,<2.0.0" },
@ -6038,57 +6029,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/2b/4d2708ac1ff5cd708b6548f4c5812d0ae40d1c28591c4c1c762b6dbdef2d/transformers-4.57.0-py3-none-any.whl", hash = "sha256:9d7c6d098c026e40d897e017ed1f481ab803cbac041021dbc6ae6100e4949b55", size = 11990588 },
]
[[package]]
name = "tree-sitter"
version = "0.24.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a7/a2/698b9d31d08ad5558f8bfbfe3a0781bd4b1f284e89bde3ad18e05101a892/tree-sitter-0.24.0.tar.gz", hash = "sha256:abd95af65ca2f4f7eca356343391ed669e764f37748b5352946f00f7fc78e734", size = 168304 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/08/9a/bd627a02e41671af73222316e1fcf87772c7804dc2fba99405275eb1f3eb/tree_sitter-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3f00feff1fc47a8e4863561b8da8f5e023d382dd31ed3e43cd11d4cae445445", size = 140890 },
{ url = "https://files.pythonhosted.org/packages/5b/9b/b1ccfb187f8be78e2116176a091a2f2abfd043a06d78f80c97c97f315b37/tree_sitter-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f9691be48d98c49ef8f498460278884c666b44129222ed6217477dffad5d4831", size = 134413 },
{ url = "https://files.pythonhosted.org/packages/01/39/e25b0042a049eb27e991133a7aa7c49bb8e49a8a7b44ca34e7e6353ba7ac/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:098a81df9f89cf254d92c1cd0660a838593f85d7505b28249216661d87adde4a", size = 560427 },
{ url = "https://files.pythonhosted.org/packages/1c/59/4d132f1388da5242151b90acf32cc56af779bfba063923699ab28b276b62/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b26bf9e958da6eb7e74a081aab9d9c7d05f9baeaa830dbb67481898fd16f1f5", size = 574327 },
{ url = "https://files.pythonhosted.org/packages/ec/97/3914e45ab9e0ff0f157e493caa91791372508488b97ff0961a0640a37d25/tree_sitter-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2a84ff87a2f2a008867a1064aba510ab3bd608e3e0cd6e8fef0379efee266c73", size = 577171 },
{ url = "https://files.pythonhosted.org/packages/c5/b0/266a529c3eef171137b73cde8ad7aa282734354609a8b2f5564428e8f12d/tree_sitter-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:c012e4c345c57a95d92ab5a890c637aaa51ab3b7ff25ed7069834b1087361c95", size = 120260 },
{ url = "https://files.pythonhosted.org/packages/c1/c3/07bfaa345e0037ff75d98b7a643cf940146e4092a1fd54eed0359836be03/tree_sitter-0.24.0-cp310-cp310-win_arm64.whl", hash = "sha256:033506c1bc2ba7bd559b23a6bdbeaf1127cee3c68a094b82396718596dfe98bc", size = 108416 },
{ url = "https://files.pythonhosted.org/packages/66/08/82aaf7cbea7286ee2a0b43e9b75cb93ac6ac132991b7d3c26ebe5e5235a3/tree_sitter-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de0fb7c18c6068cacff46250c0a0473e8fc74d673e3e86555f131c2c1346fb13", size = 140733 },
{ url = "https://files.pythonhosted.org/packages/8c/bd/1a84574911c40734d80327495e6e218e8f17ef318dd62bb66b55c1e969f5/tree_sitter-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7c9c89666dea2ce2b2bf98e75f429d2876c569fab966afefdcd71974c6d8538", size = 134243 },
{ url = "https://files.pythonhosted.org/packages/46/c1/c2037af2c44996d7bde84eb1c9e42308cc84b547dd6da7f8a8bea33007e1/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddb113e6b8b3e3b199695b1492a47d87d06c538e63050823d90ef13cac585fd", size = 562030 },
{ url = "https://files.pythonhosted.org/packages/4c/aa/2fb4d81886df958e6ec7e370895f7106d46d0bbdcc531768326124dc8972/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01ea01a7003b88b92f7f875da6ba9d5d741e0c84bb1bd92c503c0eecd0ee6409", size = 575585 },
{ url = "https://files.pythonhosted.org/packages/e3/3c/5f997ce34c0d1b744e0f0c0757113bdfc173a2e3dadda92c751685cfcbd1/tree_sitter-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:464fa5b2cac63608915a9de8a6efd67a4da1929e603ea86abaeae2cb1fe89921", size = 578203 },
{ url = "https://files.pythonhosted.org/packages/d5/1f/f2bc7fa7c3081653ea4f2639e06ff0af4616c47105dbcc0746137da7620d/tree_sitter-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:3b1f3cbd9700e1fba0be2e7d801527e37c49fc02dc140714669144ef6ab58dce", size = 120147 },
{ url = "https://files.pythonhosted.org/packages/c0/4c/9add771772c4d72a328e656367ca948e389432548696a3819b69cdd6f41e/tree_sitter-0.24.0-cp311-cp311-win_arm64.whl", hash = "sha256:f3f08a2ca9f600b3758792ba2406971665ffbad810847398d180c48cee174ee2", size = 108302 },
{ url = "https://files.pythonhosted.org/packages/e9/57/3a590f287b5aa60c07d5545953912be3d252481bf5e178f750db75572bff/tree_sitter-0.24.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:14beeff5f11e223c37be7d5d119819880601a80d0399abe8c738ae2288804afc", size = 140788 },
{ url = "https://files.pythonhosted.org/packages/61/0b/fc289e0cba7dbe77c6655a4dd949cd23c663fd62a8b4d8f02f97e28d7fe5/tree_sitter-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26a5b130f70d5925d67b47db314da209063664585a2fd36fa69e0717738efaf4", size = 133945 },
{ url = "https://files.pythonhosted.org/packages/86/d7/80767238308a137e0b5b5c947aa243e3c1e3e430e6d0d5ae94b9a9ffd1a2/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fc5c3c26d83c9d0ecb4fc4304fba35f034b7761d35286b936c1db1217558b4e", size = 564819 },
{ url = "https://files.pythonhosted.org/packages/bf/b3/6c5574f4b937b836601f5fb556b24804b0a6341f2eb42f40c0e6464339f4/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:772e1bd8c0931c866b848d0369b32218ac97c24b04790ec4b0e409901945dd8e", size = 579303 },
{ url = "https://files.pythonhosted.org/packages/0a/f4/bd0ddf9abe242ea67cca18a64810f8af230fc1ea74b28bb702e838ccd874/tree_sitter-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:24a8dd03b0d6b8812425f3b84d2f4763322684e38baf74e5bb766128b5633dc7", size = 581054 },
{ url = "https://files.pythonhosted.org/packages/8c/1c/ff23fa4931b6ef1bbeac461b904ca7e49eaec7e7e5398584e3eef836ec96/tree_sitter-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9e8b1605ab60ed43803100f067eed71b0b0e6c1fb9860a262727dbfbbb74751", size = 120221 },
{ url = "https://files.pythonhosted.org/packages/b2/2a/9979c626f303177b7612a802237d0533155bf1e425ff6f73cc40f25453e2/tree_sitter-0.24.0-cp312-cp312-win_arm64.whl", hash = "sha256:f733a83d8355fc95561582b66bbea92ffd365c5d7a665bc9ebd25e049c2b2abb", size = 108234 },
{ url = "https://files.pythonhosted.org/packages/61/cd/2348339c85803330ce38cee1c6cbbfa78a656b34ff58606ebaf5c9e83bd0/tree_sitter-0.24.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d4a6416ed421c4210f0ca405a4834d5ccfbb8ad6692d4d74f7773ef68f92071", size = 140781 },
{ url = "https://files.pythonhosted.org/packages/8b/a3/1ea9d8b64e8dcfcc0051028a9c84a630301290995cd6e947bf88267ef7b1/tree_sitter-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e0992d483677e71d5c5d37f30dfb2e3afec2f932a9c53eec4fca13869b788c6c", size = 133928 },
{ url = "https://files.pythonhosted.org/packages/fe/ae/55c1055609c9428a4aedf4b164400ab9adb0b1bf1538b51f4b3748a6c983/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57277a12fbcefb1c8b206186068d456c600dbfbc3fd6c76968ee22614c5cd5ad", size = 564497 },
{ url = "https://files.pythonhosted.org/packages/ce/d0/f2ffcd04882c5aa28d205a787353130cbf84b2b8a977fd211bdc3b399ae3/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fa22766d63f73716c6fec1a31ee5cf904aa429484256bd5fdf5259051ed74", size = 578917 },
{ url = "https://files.pythonhosted.org/packages/af/82/aebe78ea23a2b3a79324993d4915f3093ad1af43d7c2208ee90be9273273/tree_sitter-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d5d9537507e1c8c5fa9935b34f320bfec4114d675e028f3ad94f11cf9db37b9", size = 581148 },
{ url = "https://files.pythonhosted.org/packages/a1/b4/6b0291a590c2b0417cfdb64ccb8ea242f270a46ed429c641fbc2bfab77e0/tree_sitter-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:f58bb4956917715ec4d5a28681829a8dad5c342cafd4aea269f9132a83ca9b34", size = 120207 },
{ url = "https://files.pythonhosted.org/packages/a8/18/542fd844b75272630229c9939b03f7db232c71a9d82aadc59c596319ea6a/tree_sitter-0.24.0-cp313-cp313-win_arm64.whl", hash = "sha256:23641bd25dcd4bb0b6fa91b8fb3f46cc9f1c9f475efe4d536d3f1f688d1b84c8", size = 108232 },
]
[[package]]
name = "tree-sitter-python"
version = "0.23.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1c/30/6766433b31be476fda6569a3a374c2220e45ffee0bff75460038a57bf23b/tree_sitter_python-0.23.6.tar.gz", hash = "sha256:354bfa0a2f9217431764a631516f85173e9711af2c13dbd796a8815acfe505d9", size = 155868 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ab/67/577a02acae5f776007c924ca86ef14c19c12e71de0aa9d2a036f3c248e7b/tree_sitter_python-0.23.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:28fbec8f74eeb2b30292d97715e60fac9ccf8a8091ce19b9d93e9b580ed280fb", size = 74361 },
{ url = "https://files.pythonhosted.org/packages/d2/a6/194b3625a7245c532ad418130d63077ce6cd241152524152f533e4d6edb0/tree_sitter_python-0.23.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:680b710051b144fedf61c95197db0094f2245e82551bf7f0c501356333571f7a", size = 76436 },
{ url = "https://files.pythonhosted.org/packages/d0/62/1da112689d6d282920e62c40e67ab39ea56463b0e7167bfc5e81818a770e/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a9dcef55507b6567207e8ee0a6b053d0688019b47ff7f26edc1764b7f4dc0a4", size = 112060 },
{ url = "https://files.pythonhosted.org/packages/5d/62/c9358584c96e38318d69b6704653684fd8467601f7b74e88aa44f4e6903f/tree_sitter_python-0.23.6-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29dacdc0cd2f64e55e61d96c6906533ebb2791972bec988450c46cce60092f5d", size = 112338 },
{ url = "https://files.pythonhosted.org/packages/1a/58/c5e61add45e34fb8ecbf057c500bae9d96ed7c9ca36edb7985da8ae45526/tree_sitter_python-0.23.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7e048733c36f564b379831689006801feb267d8194f9e793fbb395ef1723335d", size = 109382 },
{ url = "https://files.pythonhosted.org/packages/e9/f3/9b30893cae9b3811fe652dc6f90aaadfda12ae0b2757f5722fc7266f423c/tree_sitter_python-0.23.6-cp39-abi3-win_amd64.whl", hash = "sha256:a24027248399fb41594b696f929f9956828ae7cc85596d9f775e6c239cd0c2be", size = 75904 },
{ url = "https://files.pythonhosted.org/packages/87/cb/ce35a65f83a47b510d8a2f1eddf3bdbb0d57aabc87351c8788caf3309f76/tree_sitter_python-0.23.6-cp39-abi3-win_arm64.whl", hash = "sha256:71334371bd73d5fe080aed39fbff49ed8efb9506edebe16795b0c7567ed6a272", size = 73649 },
]
[[package]]
name = "triton"
version = "3.5.0"

View file

@ -19,6 +19,7 @@ from .api.v1.add import add
from .api.v1.delete import delete
from .api.v1.cognify import cognify
from .modules.memify import memify
from .modules.run_custom_pipeline import run_custom_pipeline
from .api.v1.update import update
from .api.v1.config.config import config
from .api.v1.datasets.datasets import datasets

View file

@ -40,6 +40,8 @@ from cognee.api.v1.users.routers import (
)
from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
# Ensure application logging is configured for container stdout/stderr
setup_logging()
logger = get_logger()
if os.getenv("ENV", "prod") == "prod":
@ -75,6 +77,9 @@ async def lifespan(app: FastAPI):
await get_default_user()
# Emit a clear startup message for docker logs
logger.info("Backend server has started")
yield

View file

@ -1,4 +1,5 @@
import os
from pathlib import Path
from typing import Optional
from functools import lru_cache
from cognee.root_dir import get_absolute_path, ensure_absolute_path
@ -11,6 +12,9 @@ class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
system_root_directory: str = get_absolute_path(".cognee_system")
cache_root_directory: str = get_absolute_path(".cognee_cache")
logs_root_directory: str = os.getenv(
"COGNEE_LOGS_DIR", str(os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs"))
)
monitoring_tool: object = Observer.NONE
@pydantic.model_validator(mode="after")
@ -30,6 +34,8 @@ class BaseConfig(BaseSettings):
# Require absolute paths for root directories
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
self.logs_root_directory = ensure_absolute_path(self.logs_root_directory)
# Set monitoring tool based on available keys
if self.langfuse_public_key and self.langfuse_secret_key:
self.monitoring_tool = Observer.LANGFUSE
@ -49,6 +55,7 @@ class BaseConfig(BaseSettings):
"system_root_directory": self.system_root_directory,
"monitoring_tool": self.monitoring_tool,
"cache_root_directory": self.cache_root_directory,
"logs_root_directory": self.logs_root_directory,
}

View file

@ -4,6 +4,8 @@ from typing import Union
from uuid import UUID
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
from cognee.infrastructure.databases.graph.config import get_graph_context_config
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
from cognee.infrastructure.files.storage.config import file_storage_config
from cognee.modules.users.methods import get_user
@ -14,11 +16,40 @@ vector_db_config = ContextVar("vector_db_config", default=None)
graph_db_config = ContextVar("graph_db_config", default=None)
session_user = ContextVar("session_user", default=None)
vector_dbs_with_multi_user_support = ["lancedb"]
graph_dbs_with_multi_user_support = ["kuzu"]
async def set_session_user_context_variable(user):
session_user.set(user)
def multi_user_support_possible():
graph_db_config = get_graph_context_config()
vector_db_config = get_vectordb_context_config()
return (
graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support
and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support
)
def backend_access_control_enabled():
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
if backend_access_control is None:
# If backend access control is not defined in environment variables,
# enable it by default if graph and vector DBs can support it, otherwise disable it
return multi_user_support_possible()
elif backend_access_control.lower() == "true":
# If enabled, ensure that the current graph and vector DBs can support it
multi_user_support = multi_user_support_possible()
if not multi_user_support:
raise EnvironmentError(
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
)
return True
return False
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
"""
If backend access control is enabled this function will ensure all datasets have their own databases,
@ -40,7 +71,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
base_config = get_base_config()
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
if not backend_access_control_enabled():
return
user = await get_user(user_id)

View file

@ -47,7 +47,7 @@ def create_vector_engine(
embedding_engine=embedding_engine,
)
if vector_db_provider == "pgvector":
if vector_db_provider.lower() == "pgvector":
from cognee.infrastructure.databases.relational import get_relational_config
# Get configuration for postgres database
@ -78,7 +78,7 @@ def create_vector_engine(
embedding_engine,
)
elif vector_db_provider == "chromadb":
elif vector_db_provider.lower() == "chromadb":
try:
import chromadb
except ImportError:
@ -94,7 +94,7 @@ def create_vector_engine(
embedding_engine=embedding_engine,
)
elif vector_db_provider == "neptune_analytics":
elif vector_db_provider.lower() == "neptune_analytics":
try:
from langchain_aws import NeptuneAnalyticsGraph
except ImportError:
@ -122,7 +122,7 @@ def create_vector_engine(
embedding_engine=embedding_engine,
)
else:
elif vector_db_provider.lower() == "lancedb":
from .lancedb.LanceDBAdapter import LanceDBAdapter
return LanceDBAdapter(
@ -130,3 +130,9 @@ def create_vector_engine(
api_key=vector_db_key,
embedding_engine=embedding_engine,
)
else:
raise EnvironmentError(
f"Unsupported vector database provider: {vector_db_provider}. "
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
)

View file

@ -1,4 +1,4 @@
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
from typing import Optional, Any, Dict
@ -18,9 +18,21 @@ class Edge(BaseModel):
# Mixed usage
has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item])
# With edge_text for rich embedding representation
contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity)
"""
weight: Optional[float] = None
weights: Optional[Dict[str, float]] = None
relationship_type: Optional[str] = None
properties: Optional[Dict[str, Any]] = None
edge_text: Optional[str] = None
@field_validator("edge_text", mode="before")
@classmethod
def ensure_edge_text(cls, v, info):
"""Auto-populate edge_text from relationship_type if not explicitly provided."""
if v is None and info.data.get("relationship_type"):
return info.data["relationship_type"]
return v

View file

@ -1,6 +1,6 @@
import io
import os.path
from typing import BinaryIO, TypedDict
from typing import BinaryIO, TypedDict, Optional
from pathlib import Path
from cognee.shared.logging_utils import get_logger
@ -27,7 +27,7 @@ class FileMetadata(TypedDict):
file_size: int
async def get_file_metadata(file: BinaryIO) -> FileMetadata:
async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileMetadata:
"""
Retrieve metadata from a file object.
@ -53,7 +53,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
except io.UnsupportedOperation as error:
logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
file_type = guess_file_type(file)
file_type = guess_file_type(file, name)
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)

View file

@ -1,6 +1,9 @@
from typing import BinaryIO
import io
from pathlib import Path
from typing import BinaryIO, Optional, Any
import filetype
from .is_text_content import is_text_content
from tempfile import SpooledTemporaryFile
from filetype.types.base import Type
class FileTypeException(Exception):
@ -22,90 +25,7 @@ class FileTypeException(Exception):
self.message = message
class TxtFileType(filetype.Type):
"""
Represents a text file type with specific MIME and extension properties.
Public methods:
- match: Determines whether a given buffer matches the text file type.
"""
MIME = "text/plain"
EXTENSION = "txt"
def __init__(self):
super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION)
def match(self, buf):
"""
Determine if the given buffer contains text content.
Parameters:
-----------
- buf: The buffer to check for text content.
Returns:
--------
Returns True if the buffer is identified as text content, otherwise False.
"""
return is_text_content(buf)
txt_file_type = TxtFileType()
filetype.add_type(txt_file_type)
class CustomPdfMatcher(filetype.Type):
"""
Match PDF file types based on MIME type and extension.
Public methods:
- match
Instance variables:
- MIME: The MIME type of the PDF.
- EXTENSION: The file extension of the PDF.
"""
MIME = "application/pdf"
EXTENSION = "pdf"
def __init__(self):
super(CustomPdfMatcher, self).__init__(
mime=CustomPdfMatcher.MIME, extension=CustomPdfMatcher.EXTENSION
)
def match(self, buf):
"""
Determine if the provided buffer is a PDF file.
This method checks for the presence of the PDF signature in the buffer.
Raises:
- TypeError: If the buffer is not of bytes type.
Parameters:
-----------
- buf: The buffer containing the data to be checked.
Returns:
--------
Returns True if the buffer contains a PDF signature, otherwise returns False.
"""
return b"PDF-" in buf
custom_pdf_matcher = CustomPdfMatcher()
filetype.add_type(custom_pdf_matcher)
def guess_file_type(file: BinaryIO) -> filetype.Type:
def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type:
"""
Guess the file type from the given binary file stream.
@ -122,12 +42,23 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
- filetype.Type: The guessed file type, represented as filetype.Type.
"""
# Note: If file has .txt or .text extension, consider it a plain text file as filetype.guess may not detect it properly
# as it contains no magic number encoding
ext = None
if isinstance(file, str):
ext = Path(file).suffix
elif name is not None:
ext = Path(name).suffix
if ext in [".txt", ".text"]:
file_type = Type("text/plain", "txt")
return file_type
file_type = filetype.guess(file)
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
if file_type is None:
from filetype.types.base import Type
file_type = Type("text/plain", "txt")
if file_type is None:

View file

@ -1,15 +1,13 @@
For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query.
## Timestamp requirements
- If the query contains interval extrack both starts_at and ends_at properties
- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same
- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none
-For example: "before 2009" -- starts_at: None, ends_at: 2009 or "after 2009" -- starts_at: 2009, ends_at: None
- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at
- If starts_at or ends_at cannot be extracted both of them has to be None
## Output Format
Your reply should be a JSON: list of dictionaries with the following structure:
```python
class QueryInterval(BaseModel):
starts_at: Optional[Timestamp] = None
ends_at: Optional[Timestamp] = None
```
You are tasked with identifying relevant time periods where the answer to a given query should be searched.
Current date is: `{{ time_now }}`. Determine relevant period(s) and return structured intervals.
Extraction rules:
1. Query without specific timestamp: use the time period with starts_at set to None and ends_at set to now.
2. Explicit time intervals: If the query specifies a range (e.g., from 2010 to 2020, between January and March 2023), extract both start and end dates. Always assign the earlier date to starts_at and the later date to ends_at.
3. Single timestamp: If the query refers to one specific moment (e.g., in 2015, on March 5, 2022), set starts_at and ends_at to that same timestamp.
4. Open-ended time references: For phrases such as "before X" or "after X", represent the unspecified side as None. For example: before 2009 → starts_at: None, ends_at: 2009; after 2009 → starts_at: 2009, ends_at: None.
5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.

View file

@ -1,6 +1,7 @@
import filetype
from typing import Dict, List, Optional, Any
from .LoaderInterface import LoaderInterface
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type
from cognee.shared.logging_utils import get_logger
logger = get_logger(__name__)
@ -80,7 +81,7 @@ class LoaderEngine:
"""
from pathlib import Path
file_info = filetype.guess(file_path)
file_info = guess_file_type(file_path)
path_extension = Path(file_path).suffix.lstrip(".")

View file

@ -42,6 +42,7 @@ class AudioLoader(LoaderInterface):
"audio/wav",
"audio/amr",
"audio/aiff",
"audio/x-wav",
]
@property

View file

@ -0,0 +1,55 @@
from typing import Optional, List
from cognee import memify
from cognee.context_global_variables import (
set_database_global_context_variables,
set_session_user_context_variable,
)
from cognee.exceptions import CogneeValidationError
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.shared.logging_utils import get_logger
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.models import User
from cognee.tasks.memify import extract_user_sessions, cognify_session
logger = get_logger("persist_sessions_in_knowledge_graph")
async def persist_sessions_in_knowledge_graph_pipeline(
user: User,
session_ids: Optional[List[str]] = None,
dataset: str = "main_dataset",
run_in_background: bool = False,
):
await set_session_user_context_variable(user)
dataset_to_write = await get_authorized_existing_datasets(
user=user, datasets=[dataset], permission_type="write"
)
if not dataset_to_write:
raise CogneeValidationError(
message=f"User (id: {str(user.id)}) does not have write access to dataset: {dataset}",
log=False,
)
await set_database_global_context_variables(
dataset_to_write[0].id, dataset_to_write[0].owner_id
)
extraction_tasks = [Task(extract_user_sessions, session_ids=session_ids)]
enrichment_tasks = [
Task(cognify_session),
]
result = await memify(
extraction_tasks=extraction_tasks,
enrichment_tasks=enrichment_tasks,
dataset=dataset_to_write[0].id,
data=[{}],
run_in_background=run_in_background,
)
logger.info("Session persistence pipeline completed")
return result

View file

@ -1,6 +1,7 @@
from typing import List, Union
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.models.Edge import Edge
from cognee.modules.data.processing.document_types import Document
from cognee.modules.engine.models import Entity
from cognee.tasks.temporal_graph.models import Event
@ -31,6 +32,6 @@ class DocumentChunk(DataPoint):
chunk_index: int
cut_type: str
is_part_of: Document
contains: List[Union[Entity, Event]] = None
contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None
metadata: dict = {"index_fields": ["text"]}

View file

@ -171,8 +171,10 @@ class CogneeGraph(CogneeAbstractGraph):
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
for edge in self.edges:
relationship_type = edge.attributes.get("relationship_type")
distance = embedding_map.get(relationship_type, None)
edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
"relationship_type"
)
distance = embedding_map.get(edge_key, None)
if distance is not None:
edge.attributes["vector_distance"] = distance

View file

@ -1,5 +1,6 @@
from typing import Optional
from cognee.infrastructure.engine.models.Edge import Edge
from cognee.modules.chunking.models import DocumentChunk
from cognee.modules.engine.models import Entity, EntityType
from cognee.modules.engine.utils import (
@ -243,10 +244,26 @@ def _process_graph_nodes(
ontology_relationships,
)
# Add entity to data chunk
if data_chunk.contains is None:
data_chunk.contains = []
data_chunk.contains.append(entity_node)
edge_text = "; ".join(
[
"relationship_name: contains",
f"entity_name: {entity_node.name}",
f"entity_description: {entity_node.description}",
]
)
data_chunk.contains.append(
(
Edge(
relationship_type="contains",
edge_text=edge_text,
),
entity_node,
)
)
def _process_graph_edges(

View file

@ -1,71 +1,70 @@
import string
from typing import List
from collections import Counter
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
def _get_top_n_frequent_words(
text: str, stop_words: set = None, top_n: int = 3, separator: str = ", "
) -> str:
"""Concatenates the top N frequent words in text."""
if stop_words is None:
stop_words = DEFAULT_STOP_WORDS
words = [word.lower().strip(string.punctuation) for word in text.split()]
words = [word for word in words if word and word not in stop_words]
top_words = [word for word, freq in Counter(words).most_common(top_n)]
return separator.join(top_words)
def _create_title_from_text(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
"""Creates a title by combining first words with most frequent words from the text."""
first_words = text.split()[:first_n_words]
top_words = _get_top_n_frequent_words(text, top_n=top_n_words)
return f"{' '.join(first_words)}... [{top_words}]"
def _extract_nodes_from_edges(retrieved_edges: List[Edge]) -> dict:
"""Creates a dictionary of nodes with their names and content."""
nodes = {}
for edge in retrieved_edges:
for node in (edge.node1, edge.node2):
if node.id in nodes:
continue
text = node.attributes.get("text")
if text:
name = _create_title_from_text(text)
content = text
else:
name = node.attributes.get("name", "Unnamed Node")
content = node.attributes.get("description", name)
nodes[node.id] = {"node": node, "name": name, "content": content}
return nodes
async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
"""
Converts retrieved graph edges into a human-readable string format.
"""Converts retrieved graph edges into a human-readable string format."""
nodes = _extract_nodes_from_edges(retrieved_edges)
Parameters:
-----------
- retrieved_edges (list): A list of edges retrieved from the graph.
Returns:
--------
- str: A formatted string representation of the nodes and their connections.
"""
def _get_nodes(retrieved_edges: List[Edge]) -> dict:
def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
def _top_n_words(text, stop_words=None, top_n=3, separator=", "):
"""Concatenates the top N frequent words in text."""
if stop_words is None:
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
stop_words = DEFAULT_STOP_WORDS
import string
words = [word.lower().strip(string.punctuation) for word in text.split()]
if stop_words:
words = [word for word in words if word and word not in stop_words]
from collections import Counter
top_words = [word for word, freq in Counter(words).most_common(top_n)]
return separator.join(top_words)
"""Creates a title, by combining first words with most frequent words from the text."""
first_words = text.split()[:first_n_words]
top_words = _top_n_words(text, top_n=first_n_words)
return f"{' '.join(first_words)}... [{top_words}]"
"""Creates a dictionary of nodes with their names and content."""
nodes = {}
for edge in retrieved_edges:
for node in (edge.node1, edge.node2):
if node.id not in nodes:
text = node.attributes.get("text")
if text:
name = _get_title(text)
content = text
else:
name = node.attributes.get("name", "Unnamed Node")
content = node.attributes.get("description", name)
nodes[node.id] = {"node": node, "name": name, "content": content}
return nodes
nodes = _get_nodes(retrieved_edges)
node_section = "\n".join(
f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
for info in nodes.values()
)
connection_section = "\n".join(
f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
for edge in retrieved_edges
)
connections = []
for edge in retrieved_edges:
source_name = nodes[edge.node1.id]["name"]
target_name = nodes[edge.node2.id]["name"]
edge_label = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
connections.append(f"{source_name} --[{edge_label}]--> {target_name}")
connection_section = "\n".join(connections)
return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"

View file

@ -30,7 +30,7 @@ class BinaryData(IngestionData):
async def ensure_metadata(self):
if self.metadata is None:
self.metadata = await get_file_metadata(self.data)
self.metadata = await get_file_metadata(self.data, name=self.name)
if self.metadata["name"] is None:
self.metadata["name"] = self.name

View file

@ -1,7 +1,7 @@
import os
import asyncio
from typing import Any, Optional, List, Type
from datetime import datetime
from operator import itemgetter
from cognee.infrastructure.databases.vector import get_vector_engine
@ -79,7 +79,11 @@ class TemporalRetriever(GraphCompletionRetriever):
else:
base_directory = None
system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
time_now = datetime.now().strftime("%d-%m-%Y")
system_prompt = render_prompt(
prompt_path, {"time_now": time_now}, base_directory=base_directory
)
interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
@ -108,8 +112,6 @@ class TemporalRetriever(GraphCompletionRetriever):
graph_engine = await get_graph_engine()
triplets = []
if time_from and time_to:
ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to)
elif time_from:

View file

@ -71,7 +71,7 @@ async def get_memory_fragment(
await memory_fragment.project_graph_from_db(
graph_engine,
node_properties_to_project=properties_to_project,
edge_properties_to_project=["relationship_name"],
edge_properties_to_project=["relationship_name", "edge_text"],
node_type=node_type,
node_name=node_name,
)

View file

@ -0,0 +1 @@
from .run_custom_pipeline import run_custom_pipeline

View file

@ -0,0 +1,69 @@
from typing import Union, Optional, List, Type, Any
from uuid import UUID
from cognee.shared.logging_utils import get_logger
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.models import User
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
logger = get_logger()
async def run_custom_pipeline(
tasks: Union[List[Task], List[str]] = None,
data: Any = None,
dataset: Union[str, UUID] = "main_dataset",
user: User = None,
vector_db_config: Optional[dict] = None,
graph_db_config: Optional[dict] = None,
data_per_batch: int = 20,
run_in_background: bool = False,
pipeline_name: str = "custom_pipeline",
):
"""
Custom pipeline in Cognee, can work with already built graphs. Data needs to be provided which can be processed
with provided tasks.
Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
This is the core processing step in Cognee that converts raw text and documents
into an intelligent knowledge graph. It analyzes content, extracts entities and
relationships, and creates semantic connections for enhanced search and reasoning.
Args:
tasks: List of Cognee Tasks to execute.
data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used.
Data provided here will be forwarded to the first extraction task in the pipeline as input.
dataset: Dataset name or dataset uuid to process.
user: User context for authentication and data access. Uses default if None.
vector_db_config: Custom vector database configuration for embeddings storage.
graph_db_config: Custom graph database configuration for relationship storage.
data_per_batch: Number of data items to be processed in parallel.
run_in_background: If True, starts processing asynchronously and returns immediately.
If False, waits for completion before returning.
Background mode recommended for large datasets (>100MB).
Use pipeline_run_id from return value to monitor progress.
"""
custom_tasks = [
*tasks,
]
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
# Run the run_pipeline in the background or blocking based on executor
return await pipeline_executor_func(
pipeline=run_pipeline,
tasks=custom_tasks,
user=user,
data=data,
datasets=dataset,
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
incremental_loading=False,
data_per_batch=data_per_batch,
pipeline_name=pipeline_name,
)

View file

@ -1,4 +1,3 @@
import os
import json
import asyncio
from uuid import UUID
@ -9,6 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.shared.logging_utils import get_logger
from cognee.shared.utils import send_telemetry
from cognee.context_global_variables import set_database_global_context_variables
from cognee.context_global_variables import backend_access_control_enabled
from cognee.modules.engine.models.node_set import NodeSet
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@ -74,7 +74,7 @@ async def search(
)
# Use search function filtered by permissions if access control is enabled
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
if backend_access_control_enabled():
search_results = await authorized_search(
query_type=query_type,
query_text=query_text,
@ -156,7 +156,7 @@ async def search(
)
else:
# This is for maintaining backwards compatibility
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
if backend_access_control_enabled():
return_value = []
for search_result in search_results:
prepared_search_results = await prepare_search_result(search_result)

View file

@ -5,6 +5,7 @@ from ..models import User
from ..get_fastapi_users import get_fastapi_users
from .get_default_user import get_default_user
from cognee.shared.logging_utils import get_logger
from cognee.context_global_variables import backend_access_control_enabled
logger = get_logger("get_authenticated_user")
@ -12,7 +13,7 @@ logger = get_logger("get_authenticated_user")
# Check environment variable to determine authentication requirement
REQUIRE_AUTHENTICATION = (
os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true"
or backend_access_control_enabled()
)
fastapi_users = get_fastapi_users()

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.methods.create_default_user import create_default_user
async def get_default_user() -> SimpleNamespace:
async def get_default_user() -> User:
db_engine = get_relational_engine()
base_config = get_base_config()
default_email = base_config.default_user_email or "default_user@example.com"

View file

@ -16,17 +16,17 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
nodes_list = []
color_map = {
"Entity": "#f47710",
"EntityType": "#6510f4",
"DocumentChunk": "#801212",
"TextSummary": "#1077f4",
"TableRow": "#f47710",
"TableType": "#6510f4",
"ColumnValue": "#13613a",
"SchemaTable": "#f47710",
"DatabaseSchema": "#6510f4",
"SchemaRelationship": "#13613a",
"default": "#D3D3D3",
"Entity": "#5C10F4",
"EntityType": "#A550FF",
"DocumentChunk": "#0DFF00",
"TextSummary": "#5C10F4",
"TableRow": "#A550FF",
"TableType": "#5C10F4",
"ColumnValue": "#757470",
"SchemaTable": "#A550FF",
"DatabaseSchema": "#5C10F4",
"SchemaRelationship": "#323332",
"default": "#D8D8D8",
}
for node_id, node_info in nodes_data:
@ -98,16 +98,19 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v5.min.js"></script>
<script src="https://d3js.org/d3-contour.v1.min.js"></script>
<style>
body, html { margin: 0; padding: 0; width: 100%; height: 100%; overflow: hidden; background: linear-gradient(90deg, #101010, #1a1a2e); color: white; font-family: 'Inter', sans-serif; }
svg { width: 100vw; height: 100vh; display: block; }
.links line { stroke: rgba(255, 255, 255, 0.4); stroke-width: 2px; }
.links line.weighted { stroke: rgba(255, 215, 0, 0.7); }
.links line.multi-weighted { stroke: rgba(0, 255, 127, 0.8); }
.nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
.node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
.edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
.links line { stroke: rgba(160, 160, 160, 0.25); stroke-width: 1.5px; stroke-linecap: round; }
.links line.weighted { stroke: rgba(255, 215, 0, 0.4); }
.links line.multi-weighted { stroke: rgba(0, 255, 127, 0.45); }
.nodes circle { stroke: white; stroke-width: 0.5px; }
.node-label { font-size: 5px; font-weight: bold; fill: #F4F4F4; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
.edge-label { font-size: 3px; fill: #F4F4F4; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; paint-order: stroke; stroke: rgba(50,51,50,0.75); stroke-width: 1px; }
.density path { mix-blend-mode: screen; }
.tooltip {
position: absolute;
@ -125,11 +128,32 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
max-width: 300px;
word-wrap: break-word;
}
#info-panel {
position: fixed;
left: 12px;
top: 12px;
width: 340px;
max-height: calc(100vh - 24px);
overflow: auto;
background: rgba(50, 51, 50, 0.7);
backdrop-filter: blur(6px);
border: 1px solid rgba(216, 216, 216, 0.35);
border-radius: 8px;
color: #F4F4F4;
padding: 12px 14px;
z-index: 1100;
}
#info-panel h3 { margin: 0 0 8px 0; font-size: 14px; color: #F4F4F4; }
#info-panel .kv { font-size: 12px; line-height: 1.4; }
#info-panel .kv .k { color: #D8D8D8; }
#info-panel .kv .v { color: #F4F4F4; }
#info-panel .placeholder { opacity: 0.7; font-size: 12px; }
</style>
</head>
<body>
<svg></svg>
<div class="tooltip" id="tooltip"></div>
<div id="info-panel"><div class="placeholder">Hover a node or edge to inspect details</div></div>
<script>
var nodes = {nodes};
var links = {links};
@ -140,19 +164,141 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
var container = svg.append("g");
var tooltip = d3.select("#tooltip");
var infoPanel = d3.select('#info-panel');
function renderInfo(title, entries){
function esc(s){ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
var html = '<h3>' + esc(title) + '</h3>';
html += '<div class="kv">';
entries.forEach(function(e){
html += '<div><span class="k">' + esc(e.k) + ':</span> <span class="v">' + esc(e.v) + '</span></div>';
});
html += '</div>';
infoPanel.html(html);
}
function pickDescription(obj){
if (!obj) return null;
var keys = ['description','summary','text','content'];
for (var i=0; i<keys.length; i++){
var v = obj[keys[i]];
if (typeof v === 'string' && v.trim()) return v.trim();
}
return null;
}
function truncate(s, n){ if (!s) return s; return s.length > n ? (s.slice(0, n) + '') : s; }
function renderNodeInfo(n){
var entries = [];
if (n.name) entries.push({k:'Name', v: n.name});
if (n.type) entries.push({k:'Type', v: n.type});
if (n.id) entries.push({k:'ID', v: n.id});
var desc = pickDescription(n) || pickDescription(n.properties);
if (desc) entries.push({k:'Description', v: truncate(desc.replace(/\s+/g,' ').trim(), 280)});
if (n.properties) {
Object.keys(n.properties).slice(0, 12).forEach(function(key){
var v = n.properties[key];
if (v !== undefined && v !== null && typeof v !== 'object') entries.push({k: key, v: String(v)});
});
}
renderInfo(n.name || 'Node', entries);
}
function renderEdgeInfo(e){
var entries = [];
if (e.relation) entries.push({k:'Relation', v: e.relation});
if (e.weight !== undefined && e.weight !== null) entries.push({k:'Weight', v: e.weight});
if (e.all_weights && Object.keys(e.all_weights).length){
Object.keys(e.all_weights).slice(0, 8).forEach(function(k){ entries.push({k: 'w.'+k, v: e.all_weights[k]}); });
}
if (e.relationship_type) entries.push({k:'Type', v: e.relationship_type});
var edesc = pickDescription(e.edge_info);
if (edesc) entries.push({k:'Description', v: truncate(edesc.replace(/\s+/g,' ').trim(), 280)});
renderInfo('Edge', entries);
}
// Basic runtime diagnostics
console.log('[Cognee Visualization] nodes:', nodes ? nodes.length : 0, 'links:', links ? links.length : 0);
window.addEventListener('error', function(e){
try {
tooltip.html('<strong>Error:</strong> ' + e.message)
.style('left', '12px')
.style('top', '12px')
.style('opacity', 1);
} catch(_) {}
});
// Normalize node IDs and link endpoints for robustness
function resolveId(d){ return (d && (d.id || d.node_id || d.uuid || d.external_id || d.name)) || undefined; }
if (Array.isArray(nodes)) {
nodes.forEach(function(n){ var id = resolveId(n); if (id !== undefined) n.id = id; });
}
if (Array.isArray(links)) {
links.forEach(function(l){
if (typeof l.source === 'object') l.source = resolveId(l.source);
if (typeof l.target === 'object') l.target = resolveId(l.target);
});
}
if (!nodes || nodes.length === 0) {
container.append('text')
.attr('x', width / 2)
.attr('y', height / 2)
.attr('fill', '#fff')
.attr('font-size', 14)
.attr('text-anchor', 'middle')
.text('No graph data available');
}
// Visual defs - reusable glow
var defs = svg.append("defs");
var glow = defs.append("filter").attr("id", "glow")
.attr("x", "-30%")
.attr("y", "-30%")
.attr("width", "160%")
.attr("height", "160%");
glow.append("feGaussianBlur").attr("stdDeviation", 8).attr("result", "coloredBlur");
var feMerge = glow.append("feMerge");
feMerge.append("feMergeNode").attr("in", "coloredBlur");
feMerge.append("feMergeNode").attr("in", "SourceGraphic");
// Stronger glow for hovered adjacency
var glowStrong = defs.append("filter").attr("id", "glow-strong")
.attr("x", "-40%")
.attr("y", "-40%")
.attr("width", "180%")
.attr("height", "180%");
glowStrong.append("feGaussianBlur").attr("stdDeviation", 14).attr("result", "coloredBlur");
var feMerge2 = glowStrong.append("feMerge");
feMerge2.append("feMergeNode").attr("in", "coloredBlur");
feMerge2.append("feMergeNode").attr("in", "SourceGraphic");
var currentTransform = d3.zoomIdentity;
var densityZoomTimer = null;
var isInteracting = false;
var labelBaseSize = 10;
function getGroupKey(d){ return d && (d.type || d.category || d.group || d.color) || 'default'; }
var simulation = d3.forceSimulation(nodes)
.force("link", d3.forceLink(links).id(d => d.id).strength(0.1))
.force("charge", d3.forceManyBody().strength(-275))
.force("link", d3.forceLink(links).id(function(d){ return d.id; }).distance(100).strength(0.2))
.force("charge", d3.forceManyBody().strength(-180))
.force("collide", d3.forceCollide().radius(16).iterations(2))
.force("center", d3.forceCenter(width / 2, height / 2))
.force("x", d3.forceX().strength(0.1).x(width / 2))
.force("y", d3.forceY().strength(0.1).y(height / 2));
.force("x", d3.forceX().strength(0.06).x(width / 2))
.force("y", d3.forceY().strength(0.06).y(height / 2))
.alphaDecay(0.06)
.velocityDecay(0.6);
// Density layer (sibling of container to avoid double transforms)
var densityLayer = svg.append("g")
.attr("class", "density")
.style("pointer-events", "none");
if (densityLayer.lower) densityLayer.lower();
var link = container.append("g")
.attr("class", "links")
.selectAll("line")
.data(links)
.enter().append("line")
.style("opacity", 0)
.style("pointer-events", "none")
.attr("stroke-width", d => {
if (d.weight) return Math.max(2, d.weight * 5);
if (d.all_weights && Object.keys(d.all_weights).length > 0) {
@ -168,6 +314,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
})
.on("mouseover", function(d) {
// Create tooltip content for edge
renderEdgeInfo(d);
var content = "<strong>Edge Information</strong><br/>";
content += "Relationship: " + d.relation + "<br/>";
@ -212,6 +359,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
.data(links)
.enter().append("text")
.attr("class", "edge-label")
.style("opacity", 0)
.text(d => {
var label = d.relation;
if (d.all_weights && Object.keys(d.all_weights).length > 1) {
@ -232,21 +380,225 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
.data(nodes)
.enter().append("g");
// Color fallback by type when d.color is missing
var colorByType = {
"Entity": "#5C10F4",
"EntityType": "#A550FF",
"DocumentChunk": "#0DFF00",
"TextSummary": "#5C10F4",
"TableRow": "#A550FF",
"TableType": "#5C10F4",
"ColumnValue": "#757470",
"SchemaTable": "#A550FF",
"DatabaseSchema": "#5C10F4",
"SchemaRelationship": "#323332"
};
var node = nodeGroup.append("circle")
.attr("r", 13)
.attr("fill", d => d.color)
.attr("fill", function(d){ return d.color || colorByType[d.type] || "#D3D3D3"; })
.style("filter", "url(#glow)")
.attr("shape-rendering", "geometricPrecision")
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("drag", function(d){ dragged(d); updateDensity(); showAdjacency(d); })
.on("end", dragended));
nodeGroup.append("text")
// Show links only for hovered node adjacency
function isAdjacent(linkDatum, nodeId) {
var sid = linkDatum && linkDatum.source && (linkDatum.source.id || linkDatum.source);
var tid = linkDatum && linkDatum.target && (linkDatum.target.id || linkDatum.target);
return sid === nodeId || tid === nodeId;
}
function showAdjacency(d) {
var nodeId = d && (d.id || d.node_id || d.uuid || d.external_id || d.name);
if (!nodeId) return;
// Build neighbor set
var neighborIds = {};
neighborIds[nodeId] = true;
for (var i = 0; i < links.length; i++) {
var l = links[i];
var sid = l && l.source && (l.source.id || l.source);
var tid = l && l.target && (l.target.id || l.target);
if (sid === nodeId) neighborIds[tid] = true;
if (tid === nodeId) neighborIds[sid] = true;
}
link
.style("opacity", function(l){ return isAdjacent(l, nodeId) ? 0.95 : 0; })
.style("stroke", function(l){ return isAdjacent(l, nodeId) ? "rgba(255,255,255,0.95)" : null; })
.style("stroke-width", function(l){ return isAdjacent(l, nodeId) ? 2.5 : 1.5; });
edgeLabels.style("opacity", function(l){ return isAdjacent(l, nodeId) ? 1 : 0; });
densityLayer.style("opacity", 0.35);
// Highlight neighbor nodes and dim others
node
.style("opacity", function(n){ return neighborIds[n.id] ? 1 : 0.25; })
.style("filter", function(n){ return neighborIds[n.id] ? "url(#glow-strong)" : "url(#glow)"; })
.attr("r", function(n){ return neighborIds[n.id] ? 15 : 13; });
// Raise highlighted nodes
node.filter(function(n){ return neighborIds[n.id]; }).raise();
// Neighbor labels brighter
nodeGroup.select("text")
.style("opacity", function(n){ return neighborIds[n.id] ? 1 : 0.2; })
.style("font-size", function(n){
var size = neighborIds[n.id] ? Math.min(22, labelBaseSize * 1.25) : labelBaseSize;
return size + "px";
});
}
function clearAdjacency() {
link.style("opacity", 0)
.style("stroke", null)
.style("stroke-width", 1.5);
edgeLabels.style("opacity", 0);
densityLayer.style("opacity", 1);
node
.style("opacity", 1)
.style("filter", "url(#glow)")
.attr("r", 13);
nodeGroup.select("text")
.style("opacity", 1)
.style("font-size", labelBaseSize + "px");
}
node.on("mouseover", function(d){ showAdjacency(d); })
.on("mouseout", function(){ clearAdjacency(); });
node.on("mouseover", function(d){ renderNodeInfo(d); tooltip.style('opacity', 0); });
// Also bind on the group so labels trigger adjacency too
nodeGroup.on("mouseover", function(d){ showAdjacency(d); })
.on("mouseout", function(){ clearAdjacency(); });
// Density always on; no hover gating
// Add labels sparsely to reduce clutter (every ~50th node), and truncate long text
nodeGroup
.filter(function(d, i){ return i % 14 === 0; })
.append("text")
.attr("class", "node-label")
.attr("dy", 4)
.attr("text-anchor", "middle")
.text(d => d.name);
.text(function(d){
var s = d && d.name ? String(d.name) : '';
return s.length > 40 ? (s.slice(0, 40) + "") : s;
})
.style("font-size", labelBaseSize + "px");
node.append("title").text(d => JSON.stringify(d));
function applyLabelSize() {
var k = (currentTransform && currentTransform.k) || 1;
// Keep labels readable across zoom levels and hide when too small
labelBaseSize = Math.max(7, Math.min(18, 10 / Math.sqrt(k)));
nodeGroup.select("text")
.style("font-size", labelBaseSize + "px")
.style("display", (k < 0.35 ? "none" : null));
}
// Density cloud computation (throttled)
var densityTick = 0;
var geoPath = d3.geoPath().projection(null);
var MAX_POINTS_PER_GROUP = 400;
function updateDensity() {
try {
if (isInteracting) return; // skip during interaction for smoother UX
if (typeof d3 === 'undefined' || typeof d3.contourDensity !== 'function') {
return; // d3-contour not available; skip gracefully
}
if (!nodes || nodes.length === 0) return;
var usable = nodes.filter(function(d){ return d && typeof d.x === 'number' && isFinite(d.x) && typeof d.y === 'number' && isFinite(d.y); });
if (usable.length < 3) return; // not enough positioned points yet
var t = currentTransform || d3.zoomIdentity;
if (t.k && t.k < 0.08) {
// Skip density at extreme zoom-out to avoid numerical instability/perf issues
densityLayer.selectAll('*').remove();
return;
}
function hexToRgb(hex){
if (!hex) return {r: 0, g: 200, b: 255};
var c = hex.replace('#','');
if (c.length === 3) c = c.split('').map(function(x){ return x+x; }).join('');
var num = parseInt(c, 16);
return { r: (num >> 16) & 255, g: (num >> 8) & 255, b: num & 255 };
}
// Build groups across all nodes
var groups = {};
for (var i = 0; i < usable.length; i++) {
var k = getGroupKey(usable[i]);
if (!groups[k]) groups[k] = [];
groups[k].push(usable[i]);
}
densityLayer.selectAll('*').remove();
Object.keys(groups).forEach(function(key){
var arr = groups[key];
if (!arr || arr.length < 3) return;
// Transform positions into screen space and sample to cap cost
var arrT = [];
var step = Math.max(1, Math.floor(arr.length / MAX_POINTS_PER_GROUP));
for (var j = 0; j < arr.length; j += step) {
var nx = t.applyX(arr[j].x);
var ny = t.applyY(arr[j].y);
if (isFinite(nx) && isFinite(ny)) {
arrT.push({ x: nx, y: ny, type: arr[j].type, color: arr[j].color });
}
}
if (arrT.length < 3) return;
// Compute adaptive bandwidth based on group spread
var cx = 0, cy = 0;
for (var k = 0; k < arrT.length; k++){ cx += arrT[k].x; cy += arrT[k].y; }
cx /= arrT.length; cy /= arrT.length;
var sumR = 0;
for (var k2 = 0; k2 < arrT.length; k2++){
var dx = arrT[k2].x - cx, dy = arrT[k2].y - cy;
sumR += Math.sqrt(dx*dx + dy*dy);
}
var avgR = sumR / arrT.length;
var dynamicBandwidth = Math.max(12, Math.min(80, avgR));
var densityBandwidth = dynamicBandwidth / (t.k || 1);
var contours = d3.contourDensity()
.x(function(d){ return d.x; })
.y(function(d){ return d.y; })
.size([width, height])
.bandwidth(densityBandwidth)
.thresholds(8)
(arrT);
if (!contours || contours.length === 0) return;
var maxVal = d3.max(contours, function(d){ return d.value; }) || 1;
// Use the first node color in the group or fallback neon palette
var baseColor = (arr.find(function(d){ return d && d.color; }) || {}).color || '#00c8ff';
var rgb = hexToRgb(baseColor);
var g = densityLayer.append('g').attr('data-group', key);
g.selectAll('path')
.data(contours)
.enter()
.append('path')
.attr('d', geoPath)
.attr('fill', 'rgb(' + rgb.r + ',' + rgb.g + ',' + rgb.b + ')')
.attr('stroke', 'none')
.style('opacity', function(d){
var v = maxVal ? (d.value / maxVal) : 0;
var alpha = Math.pow(Math.max(0, Math.min(1, v)), 1.6); // accentuate clusters
return 0.65 * alpha; // up to 0.65 opacity at peak density
})
.style('filter', 'blur(2px)');
});
} catch (e) {
// Reduce impact of any runtime errors during zoom
console.warn('Density update failed:', e);
}
}
simulation.on("tick", function() {
link.attr("x1", d => d.source.x)
@ -266,16 +618,29 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
.attr("y", d => d.y)
.attr("dy", 4)
.attr("text-anchor", "middle");
densityTick += 1;
if (densityTick % 24 === 0) updateDensity();
});
svg.call(d3.zoom().on("zoom", function() {
container.attr("transform", d3.event.transform);
}));
var zoomBehavior = d3.zoom()
.on("start", function(){ isInteracting = true; densityLayer.style("opacity", 0.2); })
.on("zoom", function(){
currentTransform = d3.event.transform;
container.attr("transform", currentTransform);
})
.on("end", function(){
if (densityZoomTimer) clearTimeout(densityZoomTimer);
densityZoomTimer = setTimeout(function(){ isInteracting = false; densityLayer.style("opacity", 1); updateDensity(); }, 140);
});
svg.call(zoomBehavior);
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
isInteracting = true;
densityLayer.style("opacity", 0.2);
}
function dragged(d) {
@ -287,6 +652,8 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
if (!d3.event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
if (densityZoomTimer) clearTimeout(densityZoomTimer);
densityZoomTimer = setTimeout(function(){ isInteracting = false; densityLayer.style("opacity", 1); updateDensity(); }, 140);
}
window.addEventListener("resize", function() {
@ -295,7 +662,13 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
svg.attr("width", width).attr("height", height);
simulation.force("center", d3.forceCenter(width / 2, height / 2));
simulation.alpha(1).restart();
updateDensity();
applyLabelSize();
});
// Initial density draw
updateDensity();
applyLabelSize();
</script>
<svg style="position: fixed; bottom: 10px; right: 10px; width: 150px; height: auto; z-index: 9999;" viewBox="0 0 158 44" fill="none" xmlns="http://www.w3.org/2000/svg">
@ -305,8 +678,12 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
</html>
"""
html_content = html_template.replace("{nodes}", json.dumps(nodes_list))
html_content = html_content.replace("{links}", json.dumps(links_list))
# Safely embed JSON inside <script> by escaping </ to avoid prematurely closing the tag
def _safe_json_embed(obj):
return json.dumps(obj).replace("</", "<\\/")
html_content = html_template.replace("{nodes}", _safe_json_embed(nodes_list))
html_content = html_content.replace("{links}", _safe_json_embed(links_list))
if not destination_file_path:
home_dir = os.path.expanduser("~")

View file

@ -1,6 +1,7 @@
import os
import sys
import logging
import tempfile
import structlog
import traceback
import platform
@ -76,9 +77,38 @@ log_levels = {
# Track if structlog logging has been configured
_is_structlog_configured = False
# Path to logs directory
LOGS_DIR = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "logs"))
LOGS_DIR.mkdir(exist_ok=True) # Create logs dir if it doesn't exist
def resolve_logs_dir():
"""Resolve a writable logs directory.
Priority:
1) BaseConfig.logs_root_directory (respects COGNEE_LOGS_DIR)
2) /tmp/cognee_logs (default, best-effort create)
Returns a Path or None if none are writable/creatable.
"""
from cognee.base_config import get_base_config
base_config = get_base_config()
logs_root_directory = Path(base_config.logs_root_directory)
try:
logs_root_directory.mkdir(parents=True, exist_ok=True)
if os.access(logs_root_directory, os.W_OK):
return logs_root_directory
except Exception:
pass
try:
tmp_log_path = Path(os.path.join("/tmp", "cognee_logs"))
tmp_log_path.mkdir(parents=True, exist_ok=True)
if os.access(tmp_log_path, os.W_OK):
return tmp_log_path
except Exception:
pass
return None
# Maximum number of log files to keep
MAX_LOG_FILES = 10
@ -439,15 +469,18 @@ def setup_logging(log_level=None, name=None):
# can define their own levels.
root_logger.setLevel(logging.NOTSET)
# Resolve logs directory with env and safe fallbacks
logs_dir = resolve_logs_dir()
# Check if we already have a log file path from the environment
# NOTE: environment variable must be used here as it allows us to
# log to a single file with a name based on a timestamp in a multiprocess setting.
# Without it, we would have a separate log file for every process.
log_file_path = os.environ.get("LOG_FILE_NAME")
if not log_file_path:
if not log_file_path and logs_dir is not None:
# Create a new log file name with the cognee start time
start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
log_file_path = str((logs_dir / f"{start_time}.log").resolve())
os.environ["LOG_FILE_NAME"] = log_file_path
try:
@ -473,7 +506,8 @@ def setup_logging(log_level=None, name=None):
)
# Clean up old log files, keeping only the most recent ones
cleanup_old_logs(LOGS_DIR, MAX_LOG_FILES)
if logs_dir is not None:
cleanup_old_logs(logs_dir, MAX_LOG_FILES)
# Mark logging as configured
_is_structlog_configured = True
@ -497,6 +531,10 @@ def setup_logging(log_level=None, name=None):
# Get a configured logger and log system information
logger = structlog.get_logger(name if name else __name__)
if logs_dir is not None:
logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
# Detailed initialization for regular usage
logger.info(
"Logging initialized",

View file

@ -11,8 +11,10 @@ import pathlib
from uuid import uuid4, uuid5, NAMESPACE_OID
from cognee.base_config import get_base_config
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.databases.graph import get_graph_engine
logger = get_logger()
# Analytics Proxy Url, currently hosted by Vercel
proxy_url = "https://test.prometh.ai"
@ -38,16 +40,21 @@ def get_anonymous_id():
home_dir = str(pathlib.Path(pathlib.Path(__file__).parent.parent.parent.resolve()))
if not os.path.isdir(home_dir):
os.makedirs(home_dir, exist_ok=True)
anonymous_id_file = os.path.join(home_dir, ".anon_id")
if not os.path.isfile(anonymous_id_file):
anonymous_id = str(uuid4())
with open(anonymous_id_file, "w", encoding="utf-8") as f:
f.write(anonymous_id)
else:
with open(anonymous_id_file, "r", encoding="utf-8") as f:
anonymous_id = f.read()
try:
if not os.path.isdir(home_dir):
os.makedirs(home_dir, exist_ok=True)
anonymous_id_file = os.path.join(home_dir, ".anon_id")
if not os.path.isfile(anonymous_id_file):
anonymous_id = str(uuid4())
with open(anonymous_id_file, "w", encoding="utf-8") as f:
f.write(anonymous_id)
else:
with open(anonymous_id_file, "r", encoding="utf-8") as f:
anonymous_id = f.read()
except Exception as e:
# In case of read-only filesystem or other issues
logger.warning("Could not create or read anonymous id file: %s", e)
return "unknown-anonymous-id"
return anonymous_id

View file

@ -1,2 +1,4 @@
from .extract_subgraph import extract_subgraph
from .extract_subgraph_chunks import extract_subgraph_chunks
from .cognify_session import cognify_session
from .extract_user_sessions import extract_user_sessions

View file

@ -0,0 +1,40 @@
import cognee
from cognee.exceptions import CogneeValidationError, CogneeSystemError
from cognee.shared.logging_utils import get_logger
logger = get_logger("cognify_session")
async def cognify_session(data):
"""
Process and cognify session data into the knowledge graph.
Adds session content to cognee with a dedicated "user_sessions" node set,
then triggers the cognify pipeline to extract entities and relationships
from the session data.
Args:
data: Session string containing Question, Context, and Answer information.
Raises:
CogneeValidationError: If data is None or empty.
CogneeSystemError: If cognee operations fail.
"""
try:
if not data or (isinstance(data, str) and not data.strip()):
logger.warning("Empty session data provided to cognify_session task, skipping")
raise CogneeValidationError(message="Session data cannot be empty", log=False)
logger.info("Processing session data for cognification")
await cognee.add(data, node_set=["user_sessions_from_cache"])
logger.debug("Session data added to cognee with node_set: user_sessions")
await cognee.cognify()
logger.info("Session data successfully cognified")
except CogneeValidationError:
raise
except Exception as e:
logger.error(f"Error cognifying session data: {str(e)}")
raise CogneeSystemError(message=f"Failed to cognify session data: {str(e)}", log=False)

View file

@ -0,0 +1,73 @@
from typing import Optional, List
from cognee.context_global_variables import session_user
from cognee.exceptions import CogneeSystemError
from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
from cognee.shared.logging_utils import get_logger
from cognee.modules.users.models import User
logger = get_logger("extract_user_sessions")
async def extract_user_sessions(
data,
session_ids: Optional[List[str]] = None,
):
"""
Extract Q&A sessions for the current user from cache.
Retrieves all Q&A triplets from specified session IDs and yields them
as formatted strings combining question, context, and answer.
Args:
data: Data passed from memify. If empty dict ({}), no external data is provided.
session_ids: Optional list of specific session IDs to extract.
Yields:
String containing session ID and all Q&A pairs formatted.
Raises:
CogneeSystemError: If cache engine is unavailable or extraction fails.
"""
try:
if not data or data == [{}]:
logger.info("Fetching session metadata for current user")
user: User = session_user.get()
if not user:
raise CogneeSystemError(message="No authenticated user found in context", log=False)
user_id = str(user.id)
cache_engine = get_cache_engine()
if cache_engine is None:
raise CogneeSystemError(
message="Cache engine not available for session extraction, please enable caching in order to have sessions to save",
log=False,
)
if session_ids:
for session_id in session_ids:
try:
qa_data = await cache_engine.get_all_qas(user_id, session_id)
if qa_data:
logger.info(f"Extracted session {session_id} with {len(qa_data)} Q&A pairs")
session_string = f"Session ID: {session_id}\n\n"
for qa_pair in qa_data:
question = qa_pair.get("question", "")
answer = qa_pair.get("answer", "")
session_string += f"Question: {question}\n\nAnswer: {answer}\n\n"
yield session_string
except Exception as e:
logger.warning(f"Failed to extract session {session_id}: {str(e)}")
continue
else:
logger.info(
"No specific session_ids provided. Please specify which sessions to extract."
)
except CogneeSystemError:
raise
except Exception as e:
logger.error(f"Error extracting user sessions: {str(e)}")
raise CogneeSystemError(message=f"Failed to extract user sessions: {str(e)}", log=False)

View file

@ -8,47 +8,58 @@ logger = get_logger("index_data_points")
async def index_data_points(data_points: list[DataPoint]):
created_indexes = {}
index_points = {}
"""Index data points in the vector engine by creating embeddings for specified fields.
Process:
1. Groups data points into a nested dict: {type_name: {field_name: [points]}}
2. Creates vector indexes for each (type, field) combination on first encounter
3. Batches points per (type, field) and creates async indexing tasks
4. Executes all indexing tasks in parallel for efficient embedding generation
Args:
data_points: List of DataPoint objects to index. Each DataPoint's metadata must
contain an 'index_fields' list specifying which fields to embed.
Returns:
The original data_points list.
"""
data_points_by_type = {}
vector_engine = get_vector_engine()
for data_point in data_points:
data_point_type = type(data_point)
type_name = data_point_type.__name__
for field_name in data_point.metadata["index_fields"]:
if getattr(data_point, field_name, None) is None:
continue
index_name = f"{data_point_type.__name__}_{field_name}"
if type_name not in data_points_by_type:
data_points_by_type[type_name] = {}
if index_name not in created_indexes:
await vector_engine.create_vector_index(data_point_type.__name__, field_name)
created_indexes[index_name] = True
if index_name not in index_points:
index_points[index_name] = []
if field_name not in data_points_by_type[type_name]:
await vector_engine.create_vector_index(type_name, field_name)
data_points_by_type[type_name][field_name] = []
indexed_data_point = data_point.model_copy()
indexed_data_point.metadata["index_fields"] = [field_name]
index_points[index_name].append(indexed_data_point)
data_points_by_type[type_name][field_name].append(indexed_data_point)
tasks: list[asyncio.Task] = []
batch_size = vector_engine.embedding_engine.get_batch_size()
for index_name_and_field, points in index_points.items():
first = index_name_and_field.index("_")
index_name = index_name_and_field[:first]
field_name = index_name_and_field[first + 1 :]
batches = (
(type_name, field_name, points[i : i + batch_size])
for type_name, fields in data_points_by_type.items()
for field_name, points in fields.items()
for i in range(0, len(points), batch_size)
)
# Create embedding requests per batch to run in parallel later
for i in range(0, len(points), batch_size):
batch = points[i : i + batch_size]
tasks.append(
asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch))
)
tasks = [
asyncio.create_task(vector_engine.index_data_points(type_name, field_name, batch_points))
for type_name, field_name, batch_points in batches
]
# Run all embedding requests in parallel
await asyncio.gather(*tasks)
return data_points

View file

@ -1,17 +1,44 @@
import asyncio
from collections import Counter
from typing import Optional, Dict, Any, List, Tuple, Union
from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
from cognee.shared.logging_utils import get_logger
from collections import Counter
from typing import Optional, Dict, Any, List, Tuple, Union
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.graph.models.EdgeType import EdgeType
from cognee.infrastructure.databases.graph.graph_db_interface import EdgeData
from cognee.tasks.storage.index_data_points import index_data_points
logger = get_logger()
def _get_edge_text(item: dict) -> str:
"""Extract edge text for embedding - prefers edge_text field with fallback."""
if "edge_text" in item:
return item["edge_text"]
if "relationship_name" in item:
return item["relationship_name"]
return ""
def create_edge_type_datapoints(edges_data) -> list[EdgeType]:
"""Transform raw edge data into EdgeType datapoints."""
edge_texts = [
_get_edge_text(item)
for edge in edges_data
for item in edge
if isinstance(item, dict) and "relationship_name" in item
]
edge_types = Counter(edge_texts)
return [
EdgeType(id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count)
for text, count in edge_types.items()
]
async def index_graph_edges(
edges_data: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]] = None,
):
@ -23,24 +50,17 @@ async def index_graph_edges(
the `relationship_name` field.
Steps:
1. Initialize the vector engine and graph engine.
2. Retrieve graph edge data and count relationship types (`relationship_name`).
3. Create vector indexes for `relationship_name` if they don't exist.
4. Transform the counted relationships into `EdgeType` objects.
5. Index the transformed data points in the vector engine.
1. Initialize the graph engine if needed and retrieve edge data.
2. Transform edge data into EdgeType datapoints.
3. Index the EdgeType datapoints using the standard indexing function.
Raises:
RuntimeError: If initialization of the vector engine or graph engine fails.
RuntimeError: If initialization of the graph engine fails.
Returns:
None
"""
try:
created_indexes = {}
index_points = {}
vector_engine = get_vector_engine()
if edges_data is None:
graph_engine = await get_graph_engine()
_, edges_data = await graph_engine.get_graph_data()
@ -51,47 +71,7 @@ async def index_graph_edges(
logger.error("Failed to initialize engines: %s", e)
raise RuntimeError("Initialization error") from e
edge_types = Counter(
item.get("relationship_name")
for edge in edges_data
for item in edge
if isinstance(item, dict) and "relationship_name" in item
)
for text, count in edge_types.items():
edge = EdgeType(
id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count
)
data_point_type = type(edge)
for field_name in edge.metadata["index_fields"]:
index_name = f"{data_point_type.__name__}.{field_name}"
if index_name not in created_indexes:
await vector_engine.create_vector_index(data_point_type.__name__, field_name)
created_indexes[index_name] = True
if index_name not in index_points:
index_points[index_name] = []
indexed_data_point = edge.model_copy()
indexed_data_point.metadata["index_fields"] = [field_name]
index_points[index_name].append(indexed_data_point)
# Get maximum batch size for embedding model
batch_size = vector_engine.embedding_engine.get_batch_size()
tasks: list[asyncio.Task] = []
for index_name, indexable_points in index_points.items():
index_name, field_name = index_name.split(".")
# Create embedding tasks to run in parallel later
for start in range(0, len(indexable_points), batch_size):
batch = indexable_points[start : start + batch_size]
tasks.append(vector_engine.index_data_points(index_name, field_name, batch))
# Start all embedding tasks and wait for completion
await asyncio.gather(*tasks)
edge_type_datapoints = create_edge_type_datapoints(edges_data)
await index_data_points(edge_type_datapoints)
return None

View file

@ -39,12 +39,12 @@ async def main():
answer = await cognee.search("Do programmers change light bulbs?")
assert len(answer) != 0
lowercase_answer = answer[0].lower()
lowercase_answer = answer[0]["search_result"][0].lower()
assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
answer = await cognee.search("What colours are there in the presentation table?")
assert len(answer) != 0
lowercase_answer = answer[0].lower()
lowercase_answer = answer[0]["search_result"][0].lower()
assert (
("red" in lowercase_answer)
and ("blue" in lowercase_answer)

View file

@ -16,9 +16,11 @@ import cognee
import pathlib
from cognee.infrastructure.databases.cache import get_cache_engine
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.search.types import SearchType
from cognee.shared.logging_utils import get_logger
from cognee.modules.users.methods import get_default_user
from collections import Counter
logger = get_logger()
@ -188,7 +190,6 @@ async def main():
f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
)
# Verify saved
history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
our_qa_summary = [
h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
@ -228,6 +229,46 @@ async def main():
assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
persist_sessions_in_knowledge_graph_pipeline,
)
logger.info("Starting persist_sessions_in_knowledge_graph tests")
await persist_sessions_in_knowledge_graph_pipeline(
user=user,
session_ids=[session_id_1, session_id_2],
dataset=dataset_name,
run_in_background=False,
)
graph_engine = await get_graph_engine()
graph = await graph_engine.get_graph_data()
type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
"Tests the correct number of NodeSet nodes after session persistence"
assert type_counts.get("NodeSet", 0) == 1, (
f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1."
)
"Tests the correct number of DocumentChunk nodes after session persistence"
assert type_counts.get("DocumentChunk", 0) == 4, (
f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)."
)
from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
vector_engine = get_vector_engine()
collection_size = await vector_engine.search(
collection_name="DocumentChunk_text",
query_text="test",
limit=1000,
)
assert len(collection_size) == 4, (
f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}"
)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)

View file

@ -52,6 +52,33 @@ async def test_edge_ingestion():
edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
"Tests edge_text presence and format"
contains_edges = [edge for edge in graph[1] if edge[2] == "contains"]
assert len(contains_edges) > 0, "Expected at least one contains edge for edge_text verification"
edge_properties = contains_edges[0][3]
assert "edge_text" in edge_properties, "Expected edge_text in edge properties"
edge_text = edge_properties["edge_text"]
assert "relationship_name: contains" in edge_text, (
f"Expected 'relationship_name: contains' in edge_text, got: {edge_text}"
)
assert "entity_name:" in edge_text, f"Expected 'entity_name:' in edge_text, got: {edge_text}"
assert "entity_description:" in edge_text, (
f"Expected 'entity_description:' in edge_text, got: {edge_text}"
)
all_edge_texts = [
edge[3].get("edge_text", "") for edge in contains_edges if "edge_text" in edge[3]
]
expected_entities = ["dave", "ana", "bob", "dexter", "apples", "cognee"]
found_entity = any(
any(entity in text.lower() for entity in expected_entities) for text in all_edge_texts
)
assert found_entity, (
f"Expected to find at least one entity name in edge_text: {all_edge_texts[:3]}"
)
"Tests the presence of basic nested edges"
for basic_nested_edge in basic_nested_edges:
assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (

View file

@ -133,7 +133,7 @@ async def main():
extraction_tasks=extraction_tasks,
enrichment_tasks=enrichment_tasks,
data=[{}],
dataset="feedback_enrichment_test_memify",
dataset=dataset_name,
)
nodes_after, edges_after = await graph_engine.get_graph_data()

View file

@ -90,15 +90,17 @@ async def main():
)
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?"
query_type=SearchType.GRAPH_COMPLETION,
query_text="What information do you contain?",
dataset_ids=[pipeline_run_obj.dataset_id],
)
assert "Mark" in search_results[0], (
assert "Mark" in search_results[0]["search_result"][0], (
"Failed to update document, no mention of Mark in search results"
)
assert "Cindy" in search_results[0], (
assert "Cindy" in search_results[0]["search_result"][0], (
"Failed to update document, no mention of Cindy in search results"
)
assert "Artificial intelligence" not in search_results[0], (
assert "Artificial intelligence" not in search_results[0]["search_result"][0], (
"Failed to update document, Artificial intelligence still mentioned in search results"
)

62
cognee/tests/test_load.py Normal file
View file

@ -0,0 +1,62 @@
import os
import pathlib
import asyncio
import time
import cognee
from cognee.modules.search.types import SearchType
from cognee.shared.logging_utils import get_logger
logger = get_logger()
async def process_and_search(num_of_searches):
start_time = time.time()
await cognee.cognify()
await asyncio.gather(
*[
cognee.search(
query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION
)
for _ in range(num_of_searches)
]
)
end_time = time.time()
return end_time - start_time
async def main():
data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
cognee.config.system_root_directory(cognee_directory_path)
num_of_pdfs = 10
num_of_reps = 5
upper_boundary_minutes = 10
average_minutes = 8
recorded_times = []
for _ in range(num_of_reps):
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
s3_input = "s3://cognee-test-load-s3-bucket"
await cognee.add(s3_input)
recorded_times.append(await process_and_search(num_of_pdfs))
average_recorded_time = sum(recorded_times) / len(recorded_times)
assert average_recorded_time <= average_minutes * 60
assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -27,6 +27,9 @@ def normalize_node_name(node_name: str) -> str:
async def setup_test_db():
# Disable backend access control to migrate relational data
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)

View file

@ -146,7 +146,13 @@ async def main():
assert len(search_results) == 1, (
f"{name}: expected single-element list, got {len(search_results)}"
)
text = search_results[0]
from cognee.context_global_variables import backend_access_control_enabled
if backend_access_control_enabled():
text = search_results[0]["search_result"][0]
else:
text = search_results[0]
assert isinstance(text, str), f"{name}: element should be a string"
assert text.strip(), f"{name}: string should not be empty"
assert "netherlands" in text.lower(), (

View file

@ -1,3 +1,4 @@
import os
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from uuid import uuid4
@ -5,8 +6,6 @@ from fastapi.testclient import TestClient
from types import SimpleNamespace
import importlib
from cognee.api.client import app
# Fixtures for reuse across test classes
@pytest.fixture
@ -32,6 +31,10 @@ def mock_authenticated_user():
)
# To turn off authentication we need to set the environment variable before importing the module
# Also both require_authentication and backend access control must be false
os.environ["REQUIRE_AUTHENTICATION"] = "false"
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
@ -40,6 +43,8 @@ class TestConditionalAuthenticationEndpoints:
@pytest.fixture
def client(self):
from cognee.api.client import app
"""Create a test client."""
return TestClient(app)
@ -133,6 +138,8 @@ class TestConditionalAuthenticationBehavior:
@pytest.fixture
def client(self):
from cognee.api.client import app
return TestClient(app)
@pytest.mark.parametrize(
@ -209,6 +216,8 @@ class TestConditionalAuthenticationErrorHandling:
@pytest.fixture
def client(self):
from cognee.api.client import app
return TestClient(app)
@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
@ -232,7 +241,7 @@ class TestConditionalAuthenticationErrorHandling:
# The exact error message may vary depending on the actual database connection
# The important thing is that we get a 500 error when user creation fails
def test_current_environment_configuration(self):
def test_current_environment_configuration(self, client):
"""Test that current environment configuration is working properly."""
# This tests the actual module state without trying to change it
from cognee.modules.users.methods.get_authenticated_user import (

View file

@ -0,0 +1,27 @@
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from cognee.tasks.storage.index_data_points import index_data_points
from cognee.infrastructure.engine import DataPoint
class TestDataPoint(DataPoint):
name: str
metadata: dict = {"index_fields": ["name"]}
@pytest.mark.asyncio
async def test_index_data_points_calls_vector_engine():
"""Test that index_data_points creates vector index and indexes data."""
data_points = [TestDataPoint(name="test1")]
mock_vector_engine = AsyncMock()
mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
with patch.dict(
index_data_points.__globals__,
{"get_vector_engine": lambda: mock_vector_engine},
):
await index_data_points(data_points)
assert mock_vector_engine.create_vector_index.await_count >= 1
assert mock_vector_engine.index_data_points.await_count >= 1

View file

@ -5,8 +5,7 @@ from cognee.tasks.storage.index_graph_edges import index_graph_edges
@pytest.mark.asyncio
async def test_index_graph_edges_success():
"""Test that index_graph_edges uses the index datapoints and creates vector index."""
# Create the mocks for the graph and vector engines.
"""Test that index_graph_edges retrieves edges and delegates to index_data_points."""
mock_graph_engine = AsyncMock()
mock_graph_engine.get_graph_data.return_value = (
None,
@ -15,26 +14,23 @@ async def test_index_graph_edges_success():
[{"relationship_name": "rel2"}],
],
)
mock_vector_engine = AsyncMock()
mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
mock_index_data_points = AsyncMock()
# Patch the globals of the function so that when it does:
# vector_engine = get_vector_engine()
# graph_engine = await get_graph_engine()
# it uses the mocked versions.
with patch.dict(
index_graph_edges.__globals__,
{
"get_graph_engine": AsyncMock(return_value=mock_graph_engine),
"get_vector_engine": lambda: mock_vector_engine,
"index_data_points": mock_index_data_points,
},
):
await index_graph_edges()
# Assertions on the mock calls.
mock_graph_engine.get_graph_data.assert_awaited_once()
assert mock_vector_engine.create_vector_index.await_count == 1
assert mock_vector_engine.index_data_points.await_count == 1
mock_index_data_points.assert_awaited_once()
call_args = mock_index_data_points.call_args[0][0]
assert len(call_args) == 2
assert all(hasattr(item, "relationship_name") for item in call_args)
@pytest.mark.asyncio
@ -42,20 +38,22 @@ async def test_index_graph_edges_no_relationships():
"""Test that index_graph_edges handles empty relationships correctly."""
mock_graph_engine = AsyncMock()
mock_graph_engine.get_graph_data.return_value = (None, [])
mock_vector_engine = AsyncMock()
mock_index_data_points = AsyncMock()
with patch.dict(
index_graph_edges.__globals__,
{
"get_graph_engine": AsyncMock(return_value=mock_graph_engine),
"get_vector_engine": lambda: mock_vector_engine,
"index_data_points": mock_index_data_points,
},
):
await index_graph_edges()
mock_graph_engine.get_graph_data.assert_awaited_once()
mock_vector_engine.create_vector_index.assert_not_awaited()
mock_vector_engine.index_data_points.assert_not_awaited()
mock_index_data_points.assert_awaited_once()
call_args = mock_index_data_points.call_args[0][0]
assert len(call_args) == 0
@pytest.mark.asyncio

View file

@ -0,0 +1,107 @@
import pytest
from unittest.mock import AsyncMock, patch
from cognee.tasks.memify.cognify_session import cognify_session
from cognee.exceptions import CogneeValidationError, CogneeSystemError
@pytest.mark.asyncio
async def test_cognify_session_success():
"""Test successful cognification of session data."""
session_data = (
"Session ID: test_session\n\nQuestion: What is AI?\n\nAnswer: AI is artificial intelligence"
)
with (
patch("cognee.add", new_callable=AsyncMock) as mock_add,
patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify,
):
await cognify_session(session_data)
mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"])
mock_cognify.assert_called_once()
@pytest.mark.asyncio
async def test_cognify_session_empty_string():
"""Test cognification fails with empty string."""
with pytest.raises(CogneeValidationError) as exc_info:
await cognify_session("")
assert "Session data cannot be empty" in str(exc_info.value)
@pytest.mark.asyncio
async def test_cognify_session_whitespace_string():
"""Test cognification fails with whitespace-only string."""
with pytest.raises(CogneeValidationError) as exc_info:
await cognify_session(" \n\t ")
assert "Session data cannot be empty" in str(exc_info.value)
@pytest.mark.asyncio
async def test_cognify_session_none_data():
"""Test cognification fails with None data."""
with pytest.raises(CogneeValidationError) as exc_info:
await cognify_session(None)
assert "Session data cannot be empty" in str(exc_info.value)
@pytest.mark.asyncio
async def test_cognify_session_add_failure():
"""Test cognification handles cognee.add failure."""
session_data = "Session ID: test\n\nQuestion: test?"
with (
patch("cognee.add", new_callable=AsyncMock) as mock_add,
patch("cognee.cognify", new_callable=AsyncMock),
):
mock_add.side_effect = Exception("Add operation failed")
with pytest.raises(CogneeSystemError) as exc_info:
await cognify_session(session_data)
assert "Failed to cognify session data" in str(exc_info.value)
assert "Add operation failed" in str(exc_info.value)
@pytest.mark.asyncio
async def test_cognify_session_cognify_failure():
"""Test cognification handles cognify failure."""
session_data = "Session ID: test\n\nQuestion: test?"
with (
patch("cognee.add", new_callable=AsyncMock),
patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify,
):
mock_cognify.side_effect = Exception("Cognify operation failed")
with pytest.raises(CogneeSystemError) as exc_info:
await cognify_session(session_data)
assert "Failed to cognify session data" in str(exc_info.value)
assert "Cognify operation failed" in str(exc_info.value)
@pytest.mark.asyncio
async def test_cognify_session_re_raises_validation_error():
"""Test that CogneeValidationError is re-raised as-is."""
with pytest.raises(CogneeValidationError):
await cognify_session("")
@pytest.mark.asyncio
async def test_cognify_session_with_special_characters():
"""Test cognification with special characters."""
session_data = "Session: test™ © Question: What's special? Answer: Cognee is special!"
with (
patch("cognee.add", new_callable=AsyncMock) as mock_add,
patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify,
):
await cognify_session(session_data)
mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"])
mock_cognify.assert_called_once()

View file

@ -0,0 +1,175 @@
import sys
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from cognee.tasks.memify.extract_user_sessions import extract_user_sessions
from cognee.exceptions import CogneeSystemError
from cognee.modules.users.models import User
# Get the actual module object (not the function) for patching
extract_user_sessions_module = sys.modules["cognee.tasks.memify.extract_user_sessions"]
@pytest.fixture
def mock_user():
"""Create a mock user."""
user = MagicMock(spec=User)
user.id = "test-user-123"
return user
@pytest.fixture
def mock_qa_data():
"""Create mock Q&A data."""
return [
{
"question": "What is cognee?",
"context": "context about cognee",
"answer": "Cognee is a knowledge graph solution",
"time": "2025-01-01T12:00:00",
},
{
"question": "How does it work?",
"context": "how it works context",
"answer": "It processes data and creates graphs",
"time": "2025-01-01T12:05:00",
},
]
@pytest.mark.asyncio
async def test_extract_user_sessions_success(mock_user, mock_qa_data):
"""Test successful extraction of sessions."""
mock_cache_engine = AsyncMock()
mock_cache_engine.get_all_qas.return_value = mock_qa_data
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions([{}], session_ids=["test_session"]):
sessions.append(session)
assert len(sessions) == 1
assert "Session ID: test_session" in sessions[0]
assert "Question: What is cognee?" in sessions[0]
assert "Answer: Cognee is a knowledge graph solution" in sessions[0]
assert "Question: How does it work?" in sessions[0]
assert "Answer: It processes data and creates graphs" in sessions[0]
@pytest.mark.asyncio
async def test_extract_user_sessions_multiple_sessions(mock_user, mock_qa_data):
"""Test extraction of multiple sessions."""
mock_cache_engine = AsyncMock()
mock_cache_engine.get_all_qas.return_value = mock_qa_data
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions([{}], session_ids=["session1", "session2"]):
sessions.append(session)
assert len(sessions) == 2
assert mock_cache_engine.get_all_qas.call_count == 2
@pytest.mark.asyncio
async def test_extract_user_sessions_no_data(mock_user, mock_qa_data):
"""Test extraction handles empty data parameter."""
mock_cache_engine = AsyncMock()
mock_cache_engine.get_all_qas.return_value = mock_qa_data
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions(None, session_ids=["test_session"]):
sessions.append(session)
assert len(sessions) == 1
@pytest.mark.asyncio
async def test_extract_user_sessions_no_session_ids(mock_user):
"""Test extraction handles no session IDs provided."""
mock_cache_engine = AsyncMock()
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions([{}], session_ids=None):
sessions.append(session)
assert len(sessions) == 0
mock_cache_engine.get_all_qas.assert_not_called()
@pytest.mark.asyncio
async def test_extract_user_sessions_empty_qa_data(mock_user):
"""Test extraction handles empty Q&A data."""
mock_cache_engine = AsyncMock()
mock_cache_engine.get_all_qas.return_value = []
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions([{}], session_ids=["empty_session"]):
sessions.append(session)
assert len(sessions) == 0
@pytest.mark.asyncio
async def test_extract_user_sessions_cache_error_handling(mock_user, mock_qa_data):
"""Test extraction continues on cache error for specific session."""
mock_cache_engine = AsyncMock()
mock_cache_engine.get_all_qas.side_effect = [
mock_qa_data,
Exception("Cache error"),
mock_qa_data,
]
with (
patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
patch.object(
extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
),
):
mock_session_user.get.return_value = mock_user
sessions = []
async for session in extract_user_sessions(
[{}], session_ids=["session1", "session2", "session3"]
):
sessions.append(session)
assert len(sessions) == 2

View file

@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration:
# REQUIRE_AUTHENTICATION should be a boolean
assert isinstance(REQUIRE_AUTHENTICATION, bool)
# Currently should be False (optional authentication)
assert not REQUIRE_AUTHENTICATION
class TestConditionalAuthenticationEnvironmentVariables:
"""Test environment variable handling."""
def test_require_authentication_default_false(self):
"""Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars."""
with patch.dict(os.environ, {}, clear=True):
# Remove module from cache to force fresh import
module_name = "cognee.modules.users.methods.get_authenticated_user"
if module_name in sys.modules:
del sys.modules[module_name]
# Import after patching environment - module will see empty environment
from cognee.modules.users.methods.get_authenticated_user import (
REQUIRE_AUTHENTICATION,
)
importlib.invalidate_caches()
assert not REQUIRE_AUTHENTICATION
def test_require_authentication_true(self):
"""Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported."""
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}):
@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables:
assert REQUIRE_AUTHENTICATION
def test_require_authentication_false_explicit(self):
"""Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported."""
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}):
# Remove module from cache to force fresh import
module_name = "cognee.modules.users.methods.get_authenticated_user"
if module_name in sys.modules:
del sys.modules[module_name]
# Import after patching environment - module will see REQUIRE_AUTHENTICATION=false
from cognee.modules.users.methods.get_authenticated_user import (
REQUIRE_AUTHENTICATION,
)
assert not REQUIRE_AUTHENTICATION
def test_require_authentication_case_insensitive(self):
"""Test that environment variable parsing is case insensitive when imported."""
test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"]
for case in test_cases:
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}):
# Remove module from cache to force fresh import
module_name = "cognee.modules.users.methods.get_authenticated_user"
if module_name in sys.modules:
del sys.modules[module_name]
# Import after patching environment
from cognee.modules.users.methods.get_authenticated_user import (
REQUIRE_AUTHENTICATION,
)
expected = case.lower() == "true"
assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}"
def test_current_require_authentication_value(self):
"""Test that the current REQUIRE_AUTHENTICATION module value is as expected."""
from cognee.modules.users.methods.get_authenticated_user import (
REQUIRE_AUTHENTICATION,
)
# The module-level variable should currently be False (set at import time)
assert isinstance(REQUIRE_AUTHENTICATION, bool)
assert not REQUIRE_AUTHENTICATION
class TestConditionalAuthenticationEdgeCases:
"""Test edge cases and error scenarios."""

View file

@ -13,7 +13,7 @@ services:
- DEBUG=false # Change to true if debugging
- HOST=0.0.0.0
- ENVIRONMENT=local
- LOG_LEVEL=ERROR
- LOG_LEVEL=INFO
extra_hosts:
# Allows the container to reach your local machine using "host.docker.internal" instead of "localhost"
- "host.docker.internal:host-gateway"

View file

@ -43,10 +43,10 @@ sleep 2
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..."
debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app
exec debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app
else
gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app
exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app
fi
else
gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error cognee.api.client:app
exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error --access-logfile - --error-logfile - cognee.api.client:app
fi

View file

@ -168,7 +168,7 @@ async def run_procurement_example():
for q in questions:
print(f"Question: \n{q}")
results = await procurement_system.search_memory(q, search_categories=[category])
top_answer = results[category][0]
top_answer = results[category][0]["search_result"][0]
print(f"Answer: \n{top_answer.strip()}\n")
research_notes[category].append({"question": q, "answer": top_answer})

View file

@ -1,5 +1,7 @@
import argparse
import asyncio
import os
import cognee
from cognee import SearchType
from cognee.shared.logging_utils import setup_logging, ERROR
@ -8,6 +10,9 @@ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
async def main(repo_path, include_docs):
# Disable permissions feature for this example
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
run_status = False
async for run_status in run_code_graph_pipeline(repo_path, include_docs=include_docs):
run_status = run_status

View file

@ -0,0 +1,98 @@
import asyncio
import cognee
from cognee import visualize_graph
from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
persist_sessions_in_knowledge_graph_pipeline,
)
from cognee.modules.search.types import SearchType
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
logger = get_logger("conversation_session_persistence_example")
async def main():
# NOTE: CACHING has to be enabled for this example to work
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
text_1 = "Cognee is a solution that can build knowledge graph from text, creating an AI memory system"
text_2 = "Germany is a country located next to the Netherlands"
await cognee.add([text_1, text_2])
await cognee.cognify()
question = "What can I use to create a knowledge graph?"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=question,
)
print("\nSession ID: default_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
question = "You sure about that?"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text=question
)
print("\nSession ID: default_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
question = "This is awesome!"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text=question
)
print("\nSession ID: default_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
question = "Where is Germany?"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=question,
session_id="different_session",
)
print("\nSession ID: different_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
question = "Next to which country again?"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=question,
session_id="different_session",
)
print("\nSession ID: different_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
question = "So you remember everything I asked from you?"
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=question,
session_id="different_session",
)
print("\nSession ID: different_session")
print(f"Question: {question}")
print(f"Answer: {search_results}\n")
session_ids_to_persist = ["default_session", "different_session"]
default_user = await get_default_user()
await persist_sessions_in_knowledge_graph_pipeline(
user=default_user,
session_ids=session_ids_to_persist,
)
await visualize_graph()
if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())

View file

@ -67,7 +67,6 @@ async def run_feedback_enrichment_memify(last_n: int = 5):
extraction_tasks=extraction_tasks,
enrichment_tasks=enrichment_tasks,
data=[{}], # A placeholder to prevent fetching the entire graph
dataset="feedback_enrichment_minimal",
)

View file

@ -89,7 +89,7 @@ async def main():
)
print("Coding rules created by memify:")
for coding_rule in coding_rules:
for coding_rule in coding_rules[0]["search_result"][0]:
print("- " + coding_rule)
# Visualize new graph with added memify context

View file

@ -31,6 +31,9 @@ from cognee.infrastructure.databases.vector.pgvector import (
async def main():
# Disable backend access control to migrate relational data
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
# Clean all data stored in Cognee
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)

View file

@ -0,0 +1,84 @@
import asyncio
import cognee
from cognee.modules.engine.operations.setup import setup
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import setup_logging, INFO
from cognee.modules.pipelines import Task
from cognee.api.v1.search import SearchType
# Prerequisites:
# 1. Copy `.env.template` and rename it to `.env`.
# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
# LLM_API_KEY = "your_key_here"
async def main():
# Create a clean slate for cognee -- reset data and system state
print("Resetting cognee data...")
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
print("Data reset complete.\n")
# Create relational database and tables
await setup()
# cognee knowledge graph will be created based on this text
text = """
Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval.
"""
print("Adding text to cognee:")
print(text.strip())
# Let's recreate the cognee add pipeline through the custom pipeline framework
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
user = await get_default_user()
# Values for tasks need to be filled before calling the pipeline
add_tasks = [
Task(resolve_data_directories, include_subdirectories=True),
Task(
ingest_data,
"main_dataset",
user,
),
]
# Forward tasks to custom pipeline along with data and user information
await cognee.run_custom_pipeline(
tasks=add_tasks, data=text, user=user, dataset="main_dataset", pipeline_name="add_pipeline"
)
print("Text added successfully.\n")
# Use LLMs and cognee to create knowledge graph
from cognee.api.v1.cognify.cognify import get_default_tasks
cognify_tasks = await get_default_tasks(user=user)
print("Recreating existing cognify pipeline in custom pipeline to create knowledge graph...\n")
await cognee.run_custom_pipeline(
tasks=cognify_tasks, user=user, dataset="main_dataset", pipeline_name="cognify_pipeline"
)
print("Cognify process complete.\n")
query_text = "Tell me about NLP"
print(f"Searching cognee for insights with query: '{query_text}'")
# Query cognee for insights on the added text
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
)
print("Search results:")
# Display results
for result_text in search_results:
print(result_text)
if __name__ == "__main__":
logger = setup_logging(log_level=INFO)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())

View file

@ -59,14 +59,6 @@ async def main():
for result_text in search_results:
print(result_text)
# Example output:
# ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'})
# (...)
# It represents nodes and relationships in the knowledge graph:
# - The first element is the source node (e.g., 'natural language processing').
# - The second element is the relationship between nodes (e.g., 'is_a_subfield_of').
# - The third element is the target node (e.g., 'computer science').
if __name__ == "__main__":
logger = setup_logging(log_level=ERROR)

View file

@ -77,6 +77,7 @@ async def main():
"What happened between 2000 and 2006?",
"What happened between 1903 and 1995, I am interested in the Selected Works of Arnulf Øverland Ole Peter Arnulf Øverland?",
"Who is Attaphol Buspakom Attaphol Buspakom?",
"Who was Arnulf Øverland?",
]
for query_text in queries:

View file

@ -1,7 +1,7 @@
[project]
name = "cognee"
version = "0.3.7.dev1"
version = "0.3.9"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },

10
uv.lock generated
View file

@ -929,7 +929,7 @@ wheels = [
[[package]]
name = "cognee"
version = "0.3.7.dev1"
version = "0.3.9"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },
@ -6226,8 +6226,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8c/df/16848771155e7c419c60afeb24950b8aaa3ab09c0a091ec3ccca26a574d0/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c47676e5b485393f069b4d7a811267d3168ce46f988fa602658b8bb901e9e64d", size = 4410873, upload-time = "2025-10-10T11:10:38.951Z" },
{ url = "https://files.pythonhosted.org/packages/43/79/5ef5f32621abd5a541b89b04231fe959a9b327c874a1d41156041c75494b/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a28d8c01a7b27a1e3265b11250ba7557e5f72b5ee9e5f3a2fa8d2949c29bf5d2", size = 4468016, upload-time = "2025-10-10T11:10:43.319Z" },
{ url = "https://files.pythonhosted.org/packages/f0/9b/d7542d0f7ad78f57385971f426704776d7b310f5219ed58da5d605b1892e/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f3f2732cf504a1aa9e9609d02f79bea1067d99edf844ab92c247bbca143303b", size = 4164996, upload-time = "2025-10-10T11:10:46.705Z" },
{ url = "https://files.pythonhosted.org/packages/14/ed/e409388b537fa7414330687936917c522f6a77a13474e4238219fcfd9a84/psycopg2_binary-2.9.11-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:865f9945ed1b3950d968ec4690ce68c55019d79e4497366d36e090327ce7db14", size = 3981881, upload-time = "2025-10-30T02:54:57.182Z" },
{ url = "https://files.pythonhosted.org/packages/bf/30/50e330e63bb05efc6fa7c1447df3e08954894025ca3dcb396ecc6739bc26/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:91537a8df2bde69b1c1db01d6d944c831ca793952e4f57892600e96cee95f2cd", size = 3650857, upload-time = "2025-10-10T11:10:50.112Z" },
{ url = "https://files.pythonhosted.org/packages/f0/e0/4026e4c12bb49dd028756c5b0bc4c572319f2d8f1c9008e0dad8cc9addd7/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4dca1f356a67ecb68c81a7bc7809f1569ad9e152ce7fd02c2f2036862ca9f66b", size = 3296063, upload-time = "2025-10-10T11:10:54.089Z" },
{ url = "https://files.pythonhosted.org/packages/2c/34/eb172be293c886fef5299fe5c3fcf180a05478be89856067881007934a7c/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0da4de5c1ac69d94ed4364b6cbe7190c1a70d325f112ba783d83f8440285f152", size = 3043464, upload-time = "2025-10-30T02:55:02.483Z" },
{ url = "https://files.pythonhosted.org/packages/18/1c/532c5d2cb11986372f14b798a95f2eaafe5779334f6a80589a68b5fcf769/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37d8412565a7267f7d79e29ab66876e55cb5e8e7b3bbf94f8206f6795f8f7e7e", size = 3345378, upload-time = "2025-10-10T11:11:01.039Z" },
{ url = "https://files.pythonhosted.org/packages/70/e7/de420e1cf16f838e1fa17b1120e83afff374c7c0130d088dba6286fcf8ea/psycopg2_binary-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:c665f01ec8ab273a61c62beeb8cce3014c214429ced8a308ca1fc410ecac3a39", size = 2713904, upload-time = "2025-10-10T11:11:04.81Z" },
{ url = "https://files.pythonhosted.org/packages/c7/ae/8d8266f6dd183ab4d48b95b9674034e1b482a3f8619b33a0d86438694577/psycopg2_binary-2.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e8480afd62362d0a6a27dd09e4ca2def6fa50ed3a4e7c09165266106b2ffa10", size = 3756452, upload-time = "2025-10-10T11:11:11.583Z" },
@ -6235,8 +6237,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/48/89/3fdb5902bdab8868bbedc1c6e6023a4e08112ceac5db97fc2012060e0c9a/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e164359396576a3cc701ba8af4751ae68a07235d7a380c631184a611220d9a4", size = 4410955, upload-time = "2025-10-10T11:11:21.21Z" },
{ url = "https://files.pythonhosted.org/packages/ce/24/e18339c407a13c72b336e0d9013fbbbde77b6fd13e853979019a1269519c/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d57c9c387660b8893093459738b6abddbb30a7eab058b77b0d0d1c7d521ddfd7", size = 4468007, upload-time = "2025-10-10T11:11:24.831Z" },
{ url = "https://files.pythonhosted.org/packages/91/7e/b8441e831a0f16c159b5381698f9f7f7ed54b77d57bc9c5f99144cc78232/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2c226ef95eb2250974bf6fa7a842082b31f68385c4f3268370e3f3870e7859ee", size = 4165012, upload-time = "2025-10-10T11:11:29.51Z" },
{ url = "https://files.pythonhosted.org/packages/0d/61/4aa89eeb6d751f05178a13da95516c036e27468c5d4d2509bb1e15341c81/psycopg2_binary-2.9.11-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a311f1edc9967723d3511ea7d2708e2c3592e3405677bf53d5c7246753591fbb", size = 3981881, upload-time = "2025-10-30T02:55:07.332Z" },
{ url = "https://files.pythonhosted.org/packages/76/a1/2f5841cae4c635a9459fe7aca8ed771336e9383b6429e05c01267b0774cf/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb415404821b6d1c47353ebe9c8645967a5235e6d88f914147e7fd411419e6f", size = 3650985, upload-time = "2025-10-10T11:11:34.975Z" },
{ url = "https://files.pythonhosted.org/packages/84/74/4defcac9d002bca5709951b975173c8c2fa968e1a95dc713f61b3a8d3b6a/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f07c9c4a5093258a03b28fab9b4f151aa376989e7f35f855088234e656ee6a94", size = 3296039, upload-time = "2025-10-10T11:11:40.432Z" },
{ url = "https://files.pythonhosted.org/packages/6d/c2/782a3c64403d8ce35b5c50e1b684412cf94f171dc18111be8c976abd2de1/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:00ce1830d971f43b667abe4a56e42c1e2d594b32da4802e44a73bacacb25535f", size = 3043477, upload-time = "2025-10-30T02:55:11.182Z" },
{ url = "https://files.pythonhosted.org/packages/c8/31/36a1d8e702aa35c38fc117c2b8be3f182613faa25d794b8aeaab948d4c03/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cffe9d7697ae7456649617e8bb8d7a45afb71cd13f7ab22af3e5c61f04840908", size = 3345842, upload-time = "2025-10-10T11:11:45.366Z" },
{ url = "https://files.pythonhosted.org/packages/6e/b4/a5375cda5b54cb95ee9b836930fea30ae5a8f14aa97da7821722323d979b/psycopg2_binary-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:304fd7b7f97eef30e91b8f7e720b3db75fee010b520e434ea35ed1ff22501d03", size = 2713894, upload-time = "2025-10-10T11:11:48.775Z" },
{ url = "https://files.pythonhosted.org/packages/d8/91/f870a02f51be4a65987b45a7de4c2e1897dd0d01051e2b559a38fa634e3e/psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be9b840ac0525a283a96b556616f5b4820e0526addb8dcf6525a0fa162730be4", size = 3756603, upload-time = "2025-10-10T11:11:52.213Z" },
@ -6244,8 +6248,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" },
{ url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" },
{ url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" },
{ url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" },
{ url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" },
{ url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" },
{ url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" },
{ url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" },
{ url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" },
{ url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572, upload-time = "2025-10-10T11:12:32.873Z" },
@ -6253,8 +6259,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242, upload-time = "2025-10-10T11:12:42.388Z" },
{ url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258, upload-time = "2025-10-10T11:12:48.654Z" },
{ url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295, upload-time = "2025-10-10T11:12:52.525Z" },
{ url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133, upload-time = "2025-10-30T02:55:24.329Z" },
{ url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383, upload-time = "2025-10-10T11:12:56.387Z" },
{ url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168, upload-time = "2025-10-10T11:13:00.403Z" },
{ url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712, upload-time = "2025-10-30T02:55:27.975Z" },
{ url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549, upload-time = "2025-10-10T11:13:03.971Z" },
{ url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215, upload-time = "2025-10-10T11:13:07.14Z" },
]