Merge branch 'dev' into main
This commit is contained in:
commit
b2a25b2cd9
199 changed files with 9513 additions and 4545 deletions
|
|
@ -3,7 +3,7 @@
|
|||
language: en
|
||||
early_access: false
|
||||
enable_free_tier: true
|
||||
reviews:
|
||||
reviews:
|
||||
profile: chill
|
||||
instructions: >-
|
||||
# Code Review Instructions
|
||||
|
|
@ -118,10 +118,10 @@ reviews:
|
|||
- E117
|
||||
- D208
|
||||
line_length: 100
|
||||
dummy_variable_rgx: '^(_.*|junk|extra)$' # Variables starting with '_' or named 'junk' or 'extras', are considered dummy variables
|
||||
dummy_variable_rgx: '^(_.*|junk|extra)$' # Variables starting with '_' or named 'junk' or 'extras', are considered dummy variables
|
||||
markdownlint:
|
||||
enabled: true
|
||||
yamllint:
|
||||
enabled: true
|
||||
chat:
|
||||
auto_reply: true
|
||||
auto_reply: true
|
||||
|
|
|
|||
|
|
@ -2,4 +2,8 @@
|
|||
# Example:
|
||||
# CORS_ALLOWED_ORIGINS="https://yourdomain.com,https://another.com"
|
||||
# For local development, you might use:
|
||||
# CORS_ALLOWED_ORIGINS="http://localhost:3000"
|
||||
# CORS_ALLOWED_ORIGINS="http://localhost:3000"
|
||||
|
||||
LLM_API_KEY="your-openai-api-key"
|
||||
LLM_MODEL="openai/gpt-4o-mini"
|
||||
LLM_PROVIDER="openai"
|
||||
|
|
@ -28,4 +28,4 @@ secret-scan:
|
|||
- path: 'docker-compose.yml'
|
||||
comment: 'Development docker compose with test credentials (neo4j/pleaseletmein, postgres cognee/cognee)'
|
||||
- path: 'deployment/helm/docker-compose-helm.yml'
|
||||
comment: 'Helm deployment docker compose with test postgres credentials (cognee/cognee)'
|
||||
comment: 'Helm deployment docker compose with test postgres credentials (cognee/cognee)'
|
||||
|
|
|
|||
16
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
16
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
|
|
@ -8,7 +8,7 @@ body:
|
|||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report! Please provide a clear and detailed description.
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
|
|
@ -17,7 +17,7 @@ body:
|
|||
placeholder: Describe the bug in detail...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
attributes:
|
||||
|
|
@ -29,7 +29,7 @@ body:
|
|||
3. See error...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
|
|
@ -38,7 +38,7 @@ body:
|
|||
placeholder: Describe what you expected...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
|
|
@ -47,7 +47,7 @@ body:
|
|||
placeholder: Describe what actually happened...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: environment
|
||||
attributes:
|
||||
|
|
@ -61,7 +61,7 @@ body:
|
|||
- Database: [e.g. Neo4j]
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
|
|
@ -71,7 +71,7 @@ body:
|
|||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: additional
|
||||
attributes:
|
||||
|
|
@ -80,7 +80,7 @@ body:
|
|||
placeholder: Any additional information...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: checkboxes
|
||||
id: checklist
|
||||
attributes:
|
||||
|
|
|
|||
13
.github/ISSUE_TEMPLATE/documentation.yml
vendored
13
.github/ISSUE_TEMPLATE/documentation.yml
vendored
|
|
@ -8,7 +8,7 @@ body:
|
|||
attributes:
|
||||
value: |
|
||||
Thanks for helping improve our documentation! Please provide details about the documentation issue or improvement.
|
||||
|
||||
|
||||
- type: dropdown
|
||||
id: doc-type
|
||||
attributes:
|
||||
|
|
@ -22,7 +22,7 @@ body:
|
|||
- New documentation request
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: location
|
||||
attributes:
|
||||
|
|
@ -31,7 +31,7 @@ body:
|
|||
placeholder: https://cognee.ai/docs/... or specific file/section
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: issue
|
||||
attributes:
|
||||
|
|
@ -40,7 +40,7 @@ body:
|
|||
placeholder: The documentation is unclear about...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: suggestion
|
||||
attributes:
|
||||
|
|
@ -49,7 +49,7 @@ body:
|
|||
placeholder: I suggest changing this to...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: additional
|
||||
attributes:
|
||||
|
|
@ -58,7 +58,7 @@ body:
|
|||
placeholder: Additional context...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: checkboxes
|
||||
id: checklist
|
||||
attributes:
|
||||
|
|
@ -71,4 +71,3 @@ body:
|
|||
required: true
|
||||
- label: I have specified the location of the documentation issue
|
||||
required: true
|
||||
|
||||
|
|
|
|||
15
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
15
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
|
|
@ -8,7 +8,7 @@ body:
|
|||
attributes:
|
||||
value: |
|
||||
Thanks for suggesting a new feature! Please provide a clear and detailed description of your idea.
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
|
|
@ -17,7 +17,7 @@ body:
|
|||
placeholder: I'm always frustrated when...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
|
|
@ -26,7 +26,7 @@ body:
|
|||
placeholder: I would like to see...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
|
|
@ -35,7 +35,7 @@ body:
|
|||
placeholder: I have also considered...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: use-case
|
||||
attributes:
|
||||
|
|
@ -44,7 +44,7 @@ body:
|
|||
placeholder: This feature would help me...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: implementation
|
||||
attributes:
|
||||
|
|
@ -53,7 +53,7 @@ body:
|
|||
placeholder: This could be implemented by...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: additional
|
||||
attributes:
|
||||
|
|
@ -62,7 +62,7 @@ body:
|
|||
placeholder: Additional context...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: checkboxes
|
||||
id: checklist
|
||||
attributes:
|
||||
|
|
@ -75,4 +75,3 @@ body:
|
|||
required: true
|
||||
- label: I have described my specific use case
|
||||
required: true
|
||||
|
||||
|
|
|
|||
8
.github/actions/setup_neo4j/action.yml
vendored
8
.github/actions/setup_neo4j/action.yml
vendored
|
|
@ -34,14 +34,14 @@ runs:
|
|||
-e NEO4J_apoc_export_file_enabled=true \
|
||||
-e NEO4J_apoc_import_file_enabled=true \
|
||||
neo4j:${{ inputs.neo4j-version }}
|
||||
|
||||
|
||||
- name: Wait for Neo4j to be ready
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Waiting for Neo4j to start..."
|
||||
timeout=60
|
||||
counter=0
|
||||
|
||||
|
||||
while [ $counter -lt $timeout ]; do
|
||||
if docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" "RETURN 1" > /dev/null 2>&1; then
|
||||
echo "Neo4j is ready!"
|
||||
|
|
@ -51,13 +51,13 @@ runs:
|
|||
sleep 2
|
||||
counter=$((counter + 2))
|
||||
done
|
||||
|
||||
|
||||
if [ $counter -ge $timeout ]; then
|
||||
echo "Neo4j failed to start within $timeout seconds"
|
||||
docker logs neo4j-test
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
- name: Verify GDS is available
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
|
|||
2
.github/core-team.txt
vendored
2
.github/core-team.txt
vendored
|
|
@ -8,5 +8,3 @@ lxobr
|
|||
pazone
|
||||
siillee
|
||||
vasilije1990
|
||||
|
||||
|
||||
|
|
|
|||
11
.github/pull_request_template.md
vendored
11
.github/pull_request_template.md
vendored
|
|
@ -10,26 +10,21 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
|
|||
<!--
|
||||
* Key requirements to the new feature or modification;
|
||||
* Proof that the changes work and meet the requirements;
|
||||
* Include instructions on how to verify the changes. Describe how to test it locally;
|
||||
* Proof that it's sufficiently tested.
|
||||
-->
|
||||
|
||||
## Type of Change
|
||||
<!-- Please check the relevant option -->
|
||||
- [ ] Bug fix (non-breaking change that fixes an issue)
|
||||
- [ ] New feature (non-breaking change that adds functionality)
|
||||
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
|
||||
- [ ] Documentation update
|
||||
- [ ] Code refactoring
|
||||
- [ ] Performance improvement
|
||||
- [ ] Other (please specify):
|
||||
|
||||
## Screenshots/Videos (if applicable)
|
||||
<!-- Add screenshots or videos to help explain your changes -->
|
||||
## Screenshots
|
||||
<!-- ADD SCREENSHOT OF LOCAL TESTS PASSING-->
|
||||
|
||||
## Pre-submission Checklist
|
||||
<!-- Please check all boxes that apply before submitting your PR -->
|
||||
- [ ] **I have tested my changes thoroughly before submitting this PR**
|
||||
- [ ] **I have tested my changes thoroughly before submitting this PR** (See `CONTRIBUTING.md`)
|
||||
- [ ] **This PR contains minimal changes necessary to address the issue/feature**
|
||||
- [ ] My code follows the project's coding standards and style guidelines
|
||||
- [ ] I have added tests that prove my fix is effective or that my feature works
|
||||
|
|
|
|||
2
.github/release-drafter.yml
vendored
2
.github/release-drafter.yml
vendored
|
|
@ -3,7 +3,7 @@ tag-template: 'v$NEXT_PATCH_VERSION'
|
|||
|
||||
categories:
|
||||
- title: 'Features'
|
||||
labels: ['feature', 'enhancement']
|
||||
labels: ['feature', 'enhancement']
|
||||
- title: 'Bug Fixes'
|
||||
labels: ['bug', 'fix']
|
||||
- title: 'Maintenance'
|
||||
|
|
|
|||
37
.github/workflows/basic_tests.yml
vendored
37
.github/workflows/basic_tests.yml
vendored
|
|
@ -34,43 +34,6 @@ env:
|
|||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
|
||||
lint:
|
||||
name: Run Linting
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Run Linting
|
||||
uses: astral-sh/ruff-action@v2
|
||||
|
||||
format-check:
|
||||
name: Run Formatting Check
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Run Formatting Check
|
||||
uses: astral-sh/ruff-action@v2
|
||||
with:
|
||||
args: "format --check"
|
||||
|
||||
unit-tests:
|
||||
name: Run Unit Tests
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
|
|
@ -31,54 +31,54 @@ WORKFLOWS=(
|
|||
for workflow in "${WORKFLOWS[@]}"; do
|
||||
if [ -f "$workflow" ]; then
|
||||
echo "Processing $workflow..."
|
||||
|
||||
|
||||
# Create a backup
|
||||
cp "$workflow" "${workflow}.bak"
|
||||
|
||||
|
||||
# Check if the file begins with a workflow_call trigger
|
||||
if grep -q "workflow_call:" "$workflow"; then
|
||||
echo "$workflow already has workflow_call trigger, skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
|
||||
# Get the content after the 'on:' section
|
||||
on_line=$(grep -n "^on:" "$workflow" | cut -d ':' -f1)
|
||||
|
||||
|
||||
if [ -z "$on_line" ]; then
|
||||
echo "Warning: No 'on:' section found in $workflow, skipping..."
|
||||
continue
|
||||
fi
|
||||
|
||||
|
||||
# Create a new file with the modified content
|
||||
{
|
||||
# Copy the part before 'on:'
|
||||
head -n $((on_line-1)) "$workflow"
|
||||
|
||||
|
||||
# Add the new on: section that only includes workflow_call
|
||||
echo "on:"
|
||||
echo " workflow_call:"
|
||||
echo " secrets:"
|
||||
echo " inherit: true"
|
||||
|
||||
|
||||
# Find where to continue after the original 'on:' section
|
||||
next_section=$(awk "NR > $on_line && /^[a-z]/ {print NR; exit}" "$workflow")
|
||||
|
||||
|
||||
if [ -z "$next_section" ]; then
|
||||
next_section=$(wc -l < "$workflow")
|
||||
next_section=$((next_section+1))
|
||||
fi
|
||||
|
||||
|
||||
# Copy the rest of the file starting from the next section
|
||||
tail -n +$next_section "$workflow"
|
||||
} > "${workflow}.new"
|
||||
|
||||
|
||||
# Replace the original with the new version
|
||||
mv "${workflow}.new" "$workflow"
|
||||
|
||||
|
||||
echo "Modified $workflow to only run when called from test-suites.yml"
|
||||
else
|
||||
echo "Warning: $workflow not found, skipping..."
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Finished modifying workflows!"
|
||||
echo "Finished modifying workflows!"
|
||||
|
|
|
|||
2
.github/workflows/dockerhub.yml
vendored
2
.github/workflows/dockerhub.yml
vendored
|
|
@ -45,4 +45,4 @@ jobs:
|
|||
cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max
|
||||
|
||||
- name: Image digest
|
||||
run: echo ${{ steps.build.outputs.digest }}
|
||||
run: echo ${{ steps.build.outputs.digest }}
|
||||
|
|
|
|||
2
.github/workflows/label-core-team.yml
vendored
2
.github/workflows/label-core-team.yml
vendored
|
|
@ -72,5 +72,3 @@ jobs:
|
|||
} catch (error) {
|
||||
core.warning(`Failed to add label: ${error.message}`);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
2
.github/workflows/load_tests.yml
vendored
2
.github/workflows/load_tests.yml
vendored
|
|
@ -66,5 +66,3 @@ jobs:
|
|||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
|
||||
run: uv run python ./cognee/tests/test_load.py
|
||||
|
||||
|
||||
|
|
|
|||
7
.github/workflows/pre_test.yml
vendored
7
.github/workflows/pre_test.yml
vendored
|
|
@ -5,7 +5,7 @@ permissions:
|
|||
contents: read
|
||||
jobs:
|
||||
check-uv-lock:
|
||||
name: Validate uv lockfile and project metadata
|
||||
name: Lockfile and Pre-commit Hooks
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
|
|
@ -17,6 +17,9 @@ jobs:
|
|||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
||||
- name: Validate uv lockfile and project metadata
|
||||
run: uv lock --check || { echo "'uv lock --check' failed."; echo "Run 'uv lock' and push your changes."; exit 1; }
|
||||
|
||||
- name: Run pre-commit hooks
|
||||
uses: pre-commit/action@v3.0.1
|
||||
|
|
|
|||
26
.github/workflows/release.yml
vendored
26
.github/workflows/release.yml
vendored
|
|
@ -42,10 +42,10 @@ jobs:
|
|||
|
||||
echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
|
||||
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
|
||||
git tag "${TAG}"
|
||||
git push origin "${TAG}"
|
||||
|
||||
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
|
|
@ -54,8 +54,8 @@ jobs:
|
|||
generate_release_notes: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
release-pypi-package:
|
||||
|
||||
release-pypi-package:
|
||||
needs: release-github
|
||||
name: Release PyPI Package from ${{ inputs.flavour }}
|
||||
permissions:
|
||||
|
|
@ -67,25 +67,25 @@ jobs:
|
|||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.flavour }}
|
||||
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
|
||||
- name: Install Python
|
||||
run: uv python install
|
||||
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --locked --all-extras
|
||||
|
||||
|
||||
- name: Build distributions
|
||||
run: uv build
|
||||
|
||||
|
||||
- name: Publish ${{ inputs.flavour }} release to PyPI
|
||||
env:
|
||||
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||
run: uv publish
|
||||
|
||||
release-docker-image:
|
||||
|
||||
release-docker-image:
|
||||
needs: release-github
|
||||
name: Release Docker Image from ${{ inputs.flavour }}
|
||||
permissions:
|
||||
|
|
@ -128,7 +128,7 @@ jobs:
|
|||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
tags: |
|
||||
cognee/cognee:${{ needs.release-github.outputs.version }}
|
||||
cognee/cognee:latest
|
||||
labels: |
|
||||
|
|
@ -163,4 +163,4 @@ jobs:
|
|||
-H "Authorization: Bearer ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/topoteretes/cognee-community/dispatches \
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
|
|
|
|||
1
.github/workflows/release_test.yml
vendored
1
.github/workflows/release_test.yml
vendored
|
|
@ -15,4 +15,3 @@ jobs:
|
|||
name: Load Tests
|
||||
uses: ./.github/workflows/load_tests.yml
|
||||
secrets: inherit
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ on:
|
|||
required: false
|
||||
type: string
|
||||
default: '["3.10.x", "3.12.x", "3.13.x"]'
|
||||
os:
|
||||
os:
|
||||
required: false
|
||||
type: string
|
||||
default: '["ubuntu-22.04", "macos-15", "windows-latest"]'
|
||||
|
|
|
|||
2
.github/workflows/test_llms.yml
vendored
2
.github/workflows/test_llms.yml
vendored
|
|
@ -173,4 +173,4 @@ jobs:
|
|||
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
|
||||
EMBEDDING_DIMENSIONS: "1024"
|
||||
EMBEDDING_MAX_TOKENS: "8191"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
|
|
|
|||
4
.github/workflows/test_suites.yml
vendored
4
.github/workflows/test_suites.yml
vendored
|
|
@ -18,11 +18,11 @@ env:
|
|||
RUNTIME__LOG_LEVEL: ERROR
|
||||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
jobs:
|
||||
pre-test:
|
||||
name: basic checks
|
||||
uses: ./.github/workflows/pre_test.yml
|
||||
|
||||
|
||||
basic-tests:
|
||||
name: Basic Tests
|
||||
uses: ./.github/workflows/basic_tests.yml
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -147,6 +147,8 @@ venv/
|
|||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
mise.toml
|
||||
deployment/helm/values-local.yml
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
|
|
|||
|
|
@ -6,4 +6,4 @@ pull_request_rules:
|
|||
actions:
|
||||
backport:
|
||||
branches:
|
||||
- main
|
||||
- main
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ repos:
|
|||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^deployment/helm/templates/
|
||||
- id: check-added-large-files
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
|
|
|
|||
|
|
@ -128,5 +128,3 @@ MCP server and Frontend:
|
|||
## CI Mirrors Local Commands
|
||||
|
||||
Our GitHub Actions run the same ruff checks and pytest suites shown above (`.github/workflows/basic_tests.yml` and related workflows). Use the commands in this document locally to minimize CI surprises.
|
||||
|
||||
|
||||
|
|
|
|||
590
CLAUDE.md
Normal file
590
CLAUDE.md
Normal file
|
|
@ -0,0 +1,590 @@
|
|||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
Cognee is an open-source AI memory platform that transforms raw data into persistent knowledge graphs for AI agents. It replaces traditional RAG (Retrieval-Augmented Generation) with an ECL (Extract, Cognify, Load) pipeline combining vector search, graph databases, and LLM-powered entity extraction.
|
||||
|
||||
**Requirements**: Python 3.9 - 3.12
|
||||
|
||||
## Development Commands
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
# Create virtual environment (recommended: uv)
|
||||
uv venv && source .venv/bin/activate
|
||||
|
||||
# Install with pip, poetry, or uv
|
||||
uv pip install -e .
|
||||
|
||||
# Install with dev dependencies
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
# Install with specific extras
|
||||
uv pip install -e ".[postgres,neo4j,docs,chromadb]"
|
||||
|
||||
# Set up pre-commit hooks
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
### Available Installation Extras
|
||||
- **postgres** / **postgres-binary** - PostgreSQL + PGVector support
|
||||
- **neo4j** - Neo4j graph database support
|
||||
- **neptune** - AWS Neptune support
|
||||
- **chromadb** - ChromaDB vector database
|
||||
- **docs** - Document processing (unstructured library)
|
||||
- **scraping** - Web scraping (Tavily, BeautifulSoup, Playwright)
|
||||
- **langchain** - LangChain integration
|
||||
- **llama-index** - LlamaIndex integration
|
||||
- **anthropic** - Anthropic Claude models
|
||||
- **gemini** - Google Gemini models
|
||||
- **ollama** - Ollama local models
|
||||
- **mistral** - Mistral AI models
|
||||
- **groq** - Groq API support
|
||||
- **llama-cpp** - Llama.cpp local inference
|
||||
- **huggingface** - HuggingFace transformers
|
||||
- **aws** - S3 storage backend
|
||||
- **redis** - Redis caching
|
||||
- **graphiti** - Graphiti-core integration
|
||||
- **baml** - BAML structured output
|
||||
- **dlt** - Data load tool (dlt) integration
|
||||
- **docling** - Docling document processing
|
||||
- **codegraph** - Code graph extraction
|
||||
- **evals** - Evaluation tools
|
||||
- **deepeval** - DeepEval testing framework
|
||||
- **posthog** - PostHog analytics
|
||||
- **monitoring** - Sentry + Langfuse observability
|
||||
- **distributed** - Modal distributed execution
|
||||
- **dev** - All development tools (pytest, mypy, ruff, etc.)
|
||||
- **debug** - Debugpy for debugging
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=cognee --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest cognee/tests/test_custom_model.py
|
||||
|
||||
# Run specific test function
|
||||
pytest cognee/tests/test_custom_model.py::test_function_name
|
||||
|
||||
# Run async tests
|
||||
pytest -v cognee/tests/integration/
|
||||
|
||||
# Run unit tests only
|
||||
pytest cognee/tests/unit/
|
||||
|
||||
# Run integration tests only
|
||||
pytest cognee/tests/integration/
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
```bash
|
||||
# Run ruff linter
|
||||
ruff check .
|
||||
|
||||
# Run ruff formatter
|
||||
ruff format .
|
||||
|
||||
# Run both linting and formatting (pre-commit)
|
||||
pre-commit run --all-files
|
||||
|
||||
# Type checking with mypy
|
||||
mypy cognee/
|
||||
|
||||
# Run pylint
|
||||
pylint cognee/
|
||||
```
|
||||
|
||||
### Running Cognee
|
||||
```bash
|
||||
# Using Python SDK
|
||||
python examples/python/simple_example.py
|
||||
|
||||
# Using CLI
|
||||
cognee-cli add "Your text here"
|
||||
cognee-cli cognify
|
||||
cognee-cli search "Your query"
|
||||
cognee-cli delete --all
|
||||
|
||||
# Launch full stack with UI
|
||||
cognee-cli -ui
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Core Workflow: add → cognify → search/memify
|
||||
|
||||
1. **add()** - Ingest data (files, URLs, text) into datasets
|
||||
2. **cognify()** - Extract entities/relationships and build knowledge graph
|
||||
3. **search()** - Query knowledge using various retrieval strategies
|
||||
4. **memify()** - Enrich graph with additional context and rules
|
||||
|
||||
### Key Architectural Patterns
|
||||
|
||||
#### 1. Pipeline-Based Processing
|
||||
All data flows through task-based pipelines (`cognee/modules/pipelines/`). Tasks are composable units that can run sequentially or in parallel. Example pipeline tasks: `classify_documents`, `extract_graph_from_data`, `add_data_points`.
|
||||
|
||||
#### 2. Interface-Based Database Adapters
|
||||
Multiple backends are supported through adapter interfaces:
|
||||
- **Graph**: Kuzu (default), Neo4j, Neptune via `GraphDBInterface`
|
||||
- **Vector**: LanceDB (default), ChromaDB, PGVector via `VectorDBInterface`
|
||||
- **Relational**: SQLite (default), PostgreSQL
|
||||
|
||||
Key files:
|
||||
- `cognee/infrastructure/databases/graph/graph_db_interface.py`
|
||||
- `cognee/infrastructure/databases/vector/vector_db_interface.py`
|
||||
|
||||
#### 3. Multi-Tenant Access Control
|
||||
User → Dataset → Data hierarchy with permission-based filtering. Enable with `ENABLE_BACKEND_ACCESS_CONTROL=True`. Each user+dataset combination can have isolated graph/vector databases (when using supported backends: Kuzu, LanceDB, SQLite, Postgres).
|
||||
|
||||
### Layer Structure
|
||||
|
||||
```
|
||||
API Layer (cognee/api/v1/)
|
||||
↓
|
||||
Main Functions (add, cognify, search, memify)
|
||||
↓
|
||||
Pipeline Orchestrator (cognee/modules/pipelines/)
|
||||
↓
|
||||
Task Execution Layer (cognee/tasks/)
|
||||
↓
|
||||
Domain Modules (graph, retrieval, ingestion, etc.)
|
||||
↓
|
||||
Infrastructure Adapters (LLM, databases)
|
||||
↓
|
||||
External Services (OpenAI, Kuzu, LanceDB, etc.)
|
||||
```
|
||||
|
||||
### Critical Data Flow Paths
|
||||
|
||||
#### ADD: Data Ingestion
|
||||
`add()` → `resolve_data_directories` → `ingest_data` → `save_data_item_to_storage` → Create Dataset + Data records in relational DB
|
||||
|
||||
Key files: `cognee/api/v1/add/add.py`, `cognee/tasks/ingestion/ingest_data.py`
|
||||
|
||||
#### COGNIFY: Knowledge Graph Construction
|
||||
`cognify()` → `classify_documents` → `extract_chunks_from_documents` → `extract_graph_from_data` (LLM extracts entities/relationships using Instructor) → `summarize_text` → `add_data_points` (store in graph + vector DBs)
|
||||
|
||||
Key files:
|
||||
- `cognee/api/v1/cognify/cognify.py`
|
||||
- `cognee/tasks/graph/extract_graph_from_data.py`
|
||||
- `cognee/tasks/storage/add_data_points.py`
|
||||
|
||||
#### SEARCH: Retrieval
|
||||
`search(query_text, query_type)` → route to retriever type → filter by permissions → return results
|
||||
|
||||
Available search types (from `cognee/modules/search/types/SearchType.py`):
|
||||
- **GRAPH_COMPLETION** (default) - Graph traversal + LLM completion
|
||||
- **GRAPH_SUMMARY_COMPLETION** - Uses pre-computed summaries with graph context
|
||||
- **GRAPH_COMPLETION_COT** - Chain-of-thought reasoning over graph
|
||||
- **GRAPH_COMPLETION_CONTEXT_EXTENSION** - Extended context graph retrieval
|
||||
- **TRIPLET_COMPLETION** - Triplet-based (subject-predicate-object) search
|
||||
- **RAG_COMPLETION** - Traditional RAG with chunks
|
||||
- **CHUNKS** - Vector similarity search over chunks
|
||||
- **CHUNKS_LEXICAL** - Lexical (keyword) search over chunks
|
||||
- **SUMMARIES** - Search pre-computed document summaries
|
||||
- **CYPHER** - Direct Cypher query execution (requires `ALLOW_CYPHER_QUERY=True`)
|
||||
- **NATURAL_LANGUAGE** - Natural language to structured query
|
||||
- **TEMPORAL** - Time-aware graph search
|
||||
- **FEELING_LUCKY** - Automatic search type selection
|
||||
- **FEEDBACK** - User feedback-based refinement
|
||||
- **CODING_RULES** - Code-specific search rules
|
||||
|
||||
Key files:
|
||||
- `cognee/api/v1/search/search.py`
|
||||
- `cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py`
|
||||
- `cognee/modules/search/types/SearchType.py`
|
||||
|
||||
### Core Data Models
|
||||
|
||||
#### Engine Models (`cognee/infrastructure/engine/models/`)
|
||||
- **DataPoint** - Base class for all graph nodes (versioned, with metadata)
|
||||
- **Edge** - Graph relationships (source, target, relationship type)
|
||||
- **Triplet** - (Subject, Predicate, Object) representation
|
||||
|
||||
#### Graph Models (`cognee/shared/data_models.py`)
|
||||
- **KnowledgeGraph** - Container for nodes and edges
|
||||
- **Node** - Entity (id, name, type, description)
|
||||
- **Edge** - Relationship (source_node_id, target_node_id, relationship_name)
|
||||
|
||||
### Key Infrastructure Components
|
||||
|
||||
#### LLM Gateway (`cognee/infrastructure/llm/LLMGateway.py`)
|
||||
Unified interface for multiple LLM providers: OpenAI, Anthropic, Gemini, Ollama, Mistral, Bedrock. Uses Instructor for structured output extraction.
|
||||
|
||||
#### Embedding Engines
|
||||
Factory pattern for embeddings: `cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py`
|
||||
|
||||
#### Document Loaders
|
||||
Support for PDF, DOCX, CSV, images, audio, code files in `cognee/infrastructure/files/`
|
||||
|
||||
## Important Configuration
|
||||
|
||||
### Environment Setup
|
||||
Copy `.env.template` to `.env` and configure:
|
||||
|
||||
```bash
|
||||
# Minimal setup (defaults to OpenAI + local file-based databases)
|
||||
LLM_API_KEY="your_openai_api_key"
|
||||
LLM_MODEL="openai/gpt-4o-mini" # Default model
|
||||
```
|
||||
|
||||
**Important**: If you configure only LLM or only embeddings, the other defaults to OpenAI. Ensure you have a working OpenAI API key, or configure both to avoid unexpected defaults.
|
||||
|
||||
Default databases (no extra setup needed):
|
||||
- **Relational**: SQLite (metadata and state storage)
|
||||
- **Vector**: LanceDB (embeddings for semantic search)
|
||||
- **Graph**: Kuzu (knowledge graph and relationships)
|
||||
|
||||
All stored in `.venv` by default. Override with `DATA_ROOT_DIRECTORY` and `SYSTEM_ROOT_DIRECTORY`.
|
||||
|
||||
### Switching Databases
|
||||
|
||||
#### Relational Databases
|
||||
```bash
|
||||
# PostgreSQL (requires postgres extra: pip install cognee[postgres])
|
||||
DB_PROVIDER=postgres
|
||||
DB_HOST=localhost
|
||||
DB_PORT=5432
|
||||
DB_USERNAME=cognee
|
||||
DB_PASSWORD=cognee
|
||||
DB_NAME=cognee_db
|
||||
```
|
||||
|
||||
#### Vector Databases
|
||||
Supported: lancedb (default), pgvector, chromadb, qdrant, weaviate, milvus
|
||||
```bash
|
||||
# ChromaDB (requires chromadb extra)
|
||||
VECTOR_DB_PROVIDER=chromadb
|
||||
|
||||
# PGVector (requires postgres extra)
|
||||
VECTOR_DB_PROVIDER=pgvector
|
||||
VECTOR_DB_URL=postgresql://cognee:cognee@localhost:5432/cognee_db
|
||||
```
|
||||
|
||||
#### Graph Databases
|
||||
Supported: kuzu (default), neo4j, neptune, kuzu-remote
|
||||
```bash
|
||||
# Neo4j (requires neo4j extra: pip install cognee[neo4j])
|
||||
GRAPH_DATABASE_PROVIDER=neo4j
|
||||
GRAPH_DATABASE_URL=bolt://localhost:7687
|
||||
GRAPH_DATABASE_NAME=neo4j
|
||||
GRAPH_DATABASE_USERNAME=neo4j
|
||||
GRAPH_DATABASE_PASSWORD=yourpassword
|
||||
|
||||
# Remote Kuzu
|
||||
GRAPH_DATABASE_PROVIDER=kuzu-remote
|
||||
GRAPH_DATABASE_URL=http://localhost:8000
|
||||
GRAPH_DATABASE_USERNAME=your_username
|
||||
GRAPH_DATABASE_PASSWORD=your_password
|
||||
```
|
||||
|
||||
### LLM Provider Configuration
|
||||
|
||||
Supported providers: OpenAI (default), Azure OpenAI, Google Gemini, Anthropic, AWS Bedrock, Ollama, LM Studio, Custom (OpenAI-compatible APIs)
|
||||
|
||||
#### OpenAI (Recommended - Minimal Setup)
|
||||
```bash
|
||||
LLM_API_KEY="your_openai_api_key"
|
||||
LLM_MODEL="openai/gpt-4o-mini" # or gpt-4o, gpt-4-turbo, etc.
|
||||
LLM_PROVIDER="openai"
|
||||
```
|
||||
|
||||
#### Azure OpenAI
|
||||
```bash
|
||||
LLM_PROVIDER="azure"
|
||||
LLM_MODEL="azure/gpt-4o-mini"
|
||||
LLM_ENDPOINT="https://YOUR-RESOURCE.openai.azure.com/openai/deployments/gpt-4o-mini"
|
||||
LLM_API_KEY="your_azure_api_key"
|
||||
LLM_API_VERSION="2024-12-01-preview"
|
||||
```
|
||||
|
||||
#### Google Gemini (requires gemini extra)
|
||||
```bash
|
||||
LLM_PROVIDER="gemini"
|
||||
LLM_MODEL="gemini/gemini-2.0-flash-exp"
|
||||
LLM_API_KEY="your_gemini_api_key"
|
||||
```
|
||||
|
||||
#### Anthropic Claude (requires anthropic extra)
|
||||
```bash
|
||||
LLM_PROVIDER="anthropic"
|
||||
LLM_MODEL="claude-3-5-sonnet-20241022"
|
||||
LLM_API_KEY="your_anthropic_api_key"
|
||||
```
|
||||
|
||||
#### Ollama (Local - requires ollama extra)
|
||||
```bash
|
||||
LLM_PROVIDER="ollama"
|
||||
LLM_MODEL="llama3.1:8b"
|
||||
LLM_ENDPOINT="http://localhost:11434/v1"
|
||||
LLM_API_KEY="ollama"
|
||||
EMBEDDING_PROVIDER="ollama"
|
||||
EMBEDDING_MODEL="nomic-embed-text:latest"
|
||||
EMBEDDING_ENDPOINT="http://localhost:11434/api/embed"
|
||||
HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"
|
||||
```
|
||||
|
||||
#### Custom / OpenRouter / vLLM
|
||||
```bash
|
||||
LLM_PROVIDER="custom"
|
||||
LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
|
||||
LLM_ENDPOINT="https://openrouter.ai/api/v1"
|
||||
LLM_API_KEY="your_api_key"
|
||||
```
|
||||
|
||||
#### AWS Bedrock (requires aws extra)
|
||||
```bash
|
||||
LLM_PROVIDER="bedrock"
|
||||
LLM_MODEL="anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
AWS_REGION="us-east-1"
|
||||
AWS_ACCESS_KEY_ID="your_access_key"
|
||||
AWS_SECRET_ACCESS_KEY="your_secret_key"
|
||||
# Optional for temporary credentials:
|
||||
# AWS_SESSION_TOKEN="your_session_token"
|
||||
```
|
||||
|
||||
#### LLM Rate Limiting
|
||||
```bash
|
||||
LLM_RATE_LIMIT_ENABLED=true
|
||||
LLM_RATE_LIMIT_REQUESTS=60 # Requests per interval
|
||||
LLM_RATE_LIMIT_INTERVAL=60 # Interval in seconds
|
||||
```
|
||||
|
||||
#### Instructor Mode (Structured Output)
|
||||
```bash
|
||||
# LLM_INSTRUCTOR_MODE controls how structured data is extracted
|
||||
# Each LLM has its own default (e.g., gpt-4o models use "json_schema_mode")
|
||||
# Override if needed:
|
||||
LLM_INSTRUCTOR_MODE="json_schema_mode" # or "tool_call", "md_json", etc.
|
||||
```
|
||||
|
||||
### Structured Output Framework
|
||||
```bash
|
||||
# Use Instructor (default, via litellm)
|
||||
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
|
||||
|
||||
# Or use BAML (requires baml extra: pip install cognee[baml])
|
||||
STRUCTURED_OUTPUT_FRAMEWORK="baml"
|
||||
BAML_LLM_PROVIDER=openai
|
||||
BAML_LLM_MODEL="gpt-4o-mini"
|
||||
BAML_LLM_API_KEY="your_api_key"
|
||||
```
|
||||
|
||||
### Storage Backend
|
||||
```bash
|
||||
# Local filesystem (default)
|
||||
STORAGE_BACKEND="local"
|
||||
|
||||
# S3 (requires aws extra: pip install cognee[aws])
|
||||
STORAGE_BACKEND="s3"
|
||||
STORAGE_BUCKET_NAME="your-bucket-name"
|
||||
AWS_REGION="us-east-1"
|
||||
AWS_ACCESS_KEY_ID="your_access_key"
|
||||
AWS_SECRET_ACCESS_KEY="your_secret_key"
|
||||
DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
|
||||
SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
|
||||
```
|
||||
|
||||
## Extension Points
|
||||
|
||||
### Adding New Functionality
|
||||
|
||||
1. **New Task Type**: Create task function in `cognee/tasks/`, return Task object, register in pipeline
|
||||
2. **New Database Backend**: Implement `GraphDBInterface` or `VectorDBInterface` in `cognee/infrastructure/databases/`
|
||||
3. **New LLM Provider**: Add configuration in LLM config (uses litellm)
|
||||
4. **New Document Processor**: Extend loaders in `cognee/modules/data/processing/`
|
||||
5. **New Search Type**: Add to `SearchType` enum and implement retriever in `cognee/modules/retrieval/`
|
||||
6. **Custom Graph Models**: Define Pydantic models extending `DataPoint` in your code
|
||||
|
||||
### Working with Ontologies
|
||||
Cognee supports ontology-based entity extraction to ground knowledge graphs in standardized semantic frameworks (e.g., OWL ontologies).
|
||||
|
||||
Configuration:
|
||||
```bash
|
||||
ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and OWL files
|
||||
MATCHING_STRATEGY=fuzzy # Default: fuzzy matching with 80% similarity
|
||||
ONTOLOGY_FILE_PATH=/path/to/your/ontology.owl # Full path to ontology file
|
||||
```
|
||||
|
||||
Implementation: `cognee/modules/ontology/`
|
||||
|
||||
## Branching Strategy
|
||||
|
||||
**IMPORTANT**: Always branch from `dev`, not `main`. The `dev` branch is the active development branch.
|
||||
|
||||
```bash
|
||||
git checkout dev
|
||||
git pull origin dev
|
||||
git checkout -b feature/your-feature-name
|
||||
```
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Formatter**: Ruff (configured in `pyproject.toml`)
|
||||
- **Line length**: 100 characters
|
||||
- **String quotes**: Use double quotes `"` not single quotes `'` (enforced by ruff-format)
|
||||
- **Pre-commit hooks**: Run ruff linting and formatting automatically
|
||||
- **Type hints**: Encouraged (mypy checks enabled)
|
||||
- **Important**: Always run `pre-commit run --all-files` before committing to catch formatting issues
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
Tests are organized in `cognee/tests/`:
|
||||
- `unit/` - Unit tests for individual modules
|
||||
- `integration/` - Full pipeline integration tests
|
||||
- `cli_tests/` - CLI command tests
|
||||
- `tasks/` - Task-specific tests
|
||||
|
||||
When adding features, add corresponding tests. Integration tests should cover the full add → cognify → search flow.
|
||||
|
||||
## API Structure
|
||||
|
||||
FastAPI application with versioned routes under `cognee/api/v1/`:
|
||||
- `/add` - Data ingestion
|
||||
- `/cognify` - Knowledge graph processing
|
||||
- `/search` - Query interface
|
||||
- `/memify` - Graph enrichment
|
||||
- `/datasets` - Dataset management
|
||||
- `/users` - Authentication (if `REQUIRE_AUTHENTICATION=True`)
|
||||
- `/visualize` - Graph visualization server
|
||||
|
||||
## Python SDK Entry Points
|
||||
|
||||
Main functions exported from `cognee/__init__.py`:
|
||||
- `add(data, dataset_name)` - Ingest data
|
||||
- `cognify(datasets)` - Build knowledge graph
|
||||
- `search(query_text, query_type)` - Query knowledge
|
||||
- `memify(extraction_tasks, enrichment_tasks)` - Enrich graph
|
||||
- `delete(data_id)` - Remove data
|
||||
- `config()` - Configuration management
|
||||
- `datasets()` - Dataset operations
|
||||
|
||||
All functions are async - use `await` or `asyncio.run()`.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Several security environment variables in `.env`:
|
||||
- `ACCEPT_LOCAL_FILE_PATH` - Allow local file paths (default: True)
|
||||
- `ALLOW_HTTP_REQUESTS` - Allow HTTP requests from Cognee (default: True)
|
||||
- `ALLOW_CYPHER_QUERY` - Allow raw Cypher queries (default: True)
|
||||
- `REQUIRE_AUTHENTICATION` - Enable API authentication (default: False)
|
||||
- `ENABLE_BACKEND_ACCESS_CONTROL` - Multi-tenant isolation (default: True)
|
||||
|
||||
For production deployments, review and tighten these settings.
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Creating a Custom Pipeline Task
|
||||
```python
|
||||
from cognee.modules.pipelines.tasks.Task import Task
|
||||
|
||||
async def my_custom_task(data):
|
||||
# Your logic here
|
||||
processed_data = process(data)
|
||||
return processed_data
|
||||
|
||||
# Use in pipeline
|
||||
task = Task(my_custom_task)
|
||||
```
|
||||
|
||||
### Accessing Databases Directly
|
||||
```python
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
vector_engine = await get_vector_engine()
|
||||
```
|
||||
|
||||
### Using LLM Gateway
|
||||
```python
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
llm_client = get_llm_client()
|
||||
response = await llm_client.acreate_structured_output(
|
||||
text_input="Your prompt",
|
||||
system_prompt="System instructions",
|
||||
response_model=YourPydanticModel
|
||||
)
|
||||
```
|
||||
|
||||
## Key Concepts
|
||||
|
||||
### Datasets
|
||||
Datasets are project-level containers that support organization, permissions, and isolated processing workflows. Each user can have multiple datasets with different access permissions.
|
||||
|
||||
```python
|
||||
# Create/use a dataset
|
||||
await cognee.add(data, dataset_name="my_project")
|
||||
await cognee.cognify(datasets=["my_project"])
|
||||
```
|
||||
|
||||
### DataPoints
|
||||
Atomic knowledge units that form the foundation of graph structures. All graph nodes extend the `DataPoint` base class with versioning and metadata support.
|
||||
|
||||
### Permissions System
|
||||
Multi-tenant architecture with users, roles, and Access Control Lists (ACLs):
|
||||
- Read, write, delete, and share permissions per dataset
|
||||
- Enable with `ENABLE_BACKEND_ACCESS_CONTROL=True`
|
||||
- Supports isolated databases per user+dataset (Kuzu, LanceDB, SQLite, Postgres)
|
||||
|
||||
### Graph Visualization
|
||||
Launch visualization server:
|
||||
```bash
|
||||
# Via CLI
|
||||
cognee-cli -ui # Launches full stack with UI at http://localhost:3000
|
||||
|
||||
# Via Python
|
||||
from cognee.api.v1.visualize import start_visualization_server
|
||||
await start_visualization_server(port=8080)
|
||||
```
|
||||
|
||||
## Debugging & Troubleshooting
|
||||
|
||||
### Debug Configuration
|
||||
- Set `LITELLM_LOG="DEBUG"` for verbose LLM logs (default: "ERROR")
|
||||
- Enable debug mode: `ENV="development"` or `ENV="debug"`
|
||||
- Disable telemetry: `TELEMETRY_DISABLED=1`
|
||||
- Check logs in structured format (uses structlog)
|
||||
- Use `debugpy` optional dependency for debugging: `pip install cognee[debug]`
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Ollama + OpenAI Embeddings NoDataError**
|
||||
- Issue: Mixing Ollama with OpenAI embeddings can cause errors
|
||||
- Solution: Configure both LLM and embeddings to use the same provider, or ensure `HUGGINGFACE_TOKENIZER` is set when using Ollama
|
||||
|
||||
**LM Studio Structured Output**
|
||||
- Issue: LM Studio requires explicit instructor mode
|
||||
- Solution: Set `LLM_INSTRUCTOR_MODE="json_schema_mode"` (or appropriate mode)
|
||||
|
||||
**Default Provider Fallback**
|
||||
- Issue: Configuring only LLM or only embeddings defaults the other to OpenAI
|
||||
- Solution: Always configure both LLM and embedding providers, or ensure valid OpenAI API key
|
||||
|
||||
**Permission Denied on Search**
|
||||
- Behavior: Returns empty list rather than error (prevents information leakage)
|
||||
- Solution: Check dataset permissions and user access rights
|
||||
|
||||
**Database Connection Issues**
|
||||
- Check: Verify database URLs, credentials, and that services are running
|
||||
- Docker users: Use `DB_HOST=host.docker.internal` for local databases
|
||||
|
||||
**Rate Limiting Errors**
|
||||
- Enable client-side rate limiting: `LLM_RATE_LIMIT_ENABLED=true`
|
||||
- Adjust limits: `LLM_RATE_LIMIT_REQUESTS` and `LLM_RATE_LIMIT_INTERVAL`
|
||||
|
||||
## Resources
|
||||
|
||||
- [Documentation](https://docs.cognee.ai/)
|
||||
- [Discord Community](https://discord.gg/NQPKmU5CCg)
|
||||
- [GitHub Issues](https://github.com/topoteretes/cognee/issues)
|
||||
- [Example Notebooks](examples/python/)
|
||||
- [Research Paper](https://arxiv.org/abs/2505.24478) - Optimizing knowledge graphs for LLM reasoning
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
> [!IMPORTANT]
|
||||
> **Note for contributors:** When branching out, create a new branch from the `dev` branch.
|
||||
|
||||
# 🎉 Welcome to **cognee**!
|
||||
# 🎉 Welcome to **cognee**!
|
||||
|
||||
We're excited that you're interested in contributing to our project!
|
||||
We want to ensure that every user and contributor feels welcome, included and supported to participate in cognee community.
|
||||
We're excited that you're interested in contributing to our project!
|
||||
We want to ensure that every user and contributor feels welcome, included and supported to participate in cognee community.
|
||||
This guide will help you get started and ensure your contributions can be efficiently integrated into the project.
|
||||
|
||||
## 🌟 Quick Links
|
||||
|
||||
- [Code of Conduct](CODE_OF_CONDUCT.md)
|
||||
- [Discord Community](https://discord.gg/bcy8xFAtfd)
|
||||
- [Discord Community](https://discord.gg/bcy8xFAtfd)
|
||||
- [Issue Tracker](https://github.com/topoteretes/cognee/issues)
|
||||
- [Cognee Docs](https://docs.cognee.ai)
|
||||
|
||||
|
|
@ -62,6 +62,11 @@ Looking for a place to start? Try filtering for [good first issues](https://gith
|
|||
|
||||
## 2. 🛠️ Development Setup
|
||||
|
||||
### Required tools
|
||||
* [Python](https://www.python.org/downloads/)
|
||||
* [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
||||
* pre-commit: `uv run pip install pre-commit && pre-commit install`
|
||||
|
||||
### Fork and Clone
|
||||
|
||||
1. Fork the [**cognee**](https://github.com/topoteretes/cognee) repository
|
||||
|
|
@ -71,7 +76,7 @@ git clone https://github.com/<your-github-username>/cognee.git
|
|||
cd cognee
|
||||
```
|
||||
In case you are working on Vector and Graph Adapters
|
||||
1. Fork the [**cognee**](https://github.com/topoteretes/cognee-community) repository
|
||||
1. Fork the [**cognee-community**](https://github.com/topoteretes/cognee-community) repository
|
||||
2. Clone your fork:
|
||||
```shell
|
||||
git clone https://github.com/<your-github-username>/cognee-community.git
|
||||
|
|
@ -93,14 +98,46 @@ git checkout -b feature/your-feature-name
|
|||
4. **Commits**: Write clear commit messages
|
||||
|
||||
### Running Tests
|
||||
|
||||
Rename `.env.example` into `.env` and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
```shell
|
||||
python cognee/cognee/tests/test_library.py
|
||||
uv run python cognee/tests/test_library.py
|
||||
```
|
||||
|
||||
### Running Simple Example
|
||||
|
||||
Rename `.env.example` into `.env` and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
|
||||
|
||||
```shell
|
||||
python examples/python/simple_example.py
|
||||
```
|
||||
or
|
||||
|
||||
```shell
|
||||
uv run python examples/python/simple_example.py
|
||||
```
|
||||
|
||||
### Running Simple Example
|
||||
|
||||
Change .env.example into .env and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
|
||||
|
||||
```shell
|
||||
python cognee/cognee/examples/python/simple_example.py
|
||||
```
|
||||
or
|
||||
|
||||
```shell
|
||||
uv run python cognee/cognee/examples/python/simple_example.py
|
||||
```
|
||||
|
||||
## 4. 📤 Submitting Changes
|
||||
|
||||
1. Install ruff on your system
|
||||
2. Run ```ruff format .``` and ``` ruff check ``` and fix the issues
|
||||
1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure.
|
||||
3. Push your changes:
|
||||
```shell
|
||||
git add .
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
|
|||
|
||||
# Install the project's dependencies using the lockfile and settings
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --extra chromadb --frozen --no-install-project --no-dev --no-editable
|
||||
|
||||
# Copy Alembic configuration
|
||||
COPY alembic.ini /app/alembic.ini
|
||||
|
|
@ -43,7 +43,7 @@ COPY alembic/ /app/alembic
|
|||
COPY ./cognee /app/cognee
|
||||
COPY ./distributed /app/distributed
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
|
||||
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --extra chromadb --frozen --no-dev --no-editable
|
||||
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
|
|
|
|||
15
README.md
15
README.md
|
|
@ -65,12 +65,12 @@ Use your data to build personalized and dynamic memory for AI Agents. Cognee let
|
|||
|
||||
## About Cognee
|
||||
|
||||
Cognee is an open-source tool and platform that transforms your raw data into persistent and dynamic AI memory for Agents. It combines vector search with graph databases to make your documents both searchable by meaning and connected by relationships.
|
||||
Cognee is an open-source tool and platform that transforms your raw data into persistent and dynamic AI memory for Agents. It combines vector search with graph databases to make your documents both searchable by meaning and connected by relationships.
|
||||
|
||||
You can use Cognee in two ways:
|
||||
|
||||
1. [Self-host Cognee Open Source](https://docs.cognee.ai/getting-started/installation), which stores all data locally by default.
|
||||
2. [Connect to Cognee Cloud](https://platform.cognee.ai/), and get the same OSS stack on managed infrastructure for easier development and productionization.
|
||||
2. [Connect to Cognee Cloud](https://platform.cognee.ai/), and get the same OSS stack on managed infrastructure for easier development and productionization.
|
||||
|
||||
### Cognee Open Source (self-hosted):
|
||||
|
||||
|
|
@ -81,8 +81,8 @@ You can use Cognee in two ways:
|
|||
- Offers high customizability through user-defined tasks, modular pipelines, and built-in search endpoints
|
||||
|
||||
### Cognee Cloud (managed):
|
||||
- Hosted web UI dashboard
|
||||
- Automatic version updates
|
||||
- Hosted web UI dashboard
|
||||
- Automatic version updates
|
||||
- Resource usage analytics
|
||||
- GDPR compliant, enterprise-grade security
|
||||
|
||||
|
|
@ -119,13 +119,14 @@ To integrate other LLM providers, see our [LLM Provider Documentation](https://d
|
|||
|
||||
### Step 3: Run the Pipeline
|
||||
|
||||
Cognee will take your documents, generate a knowledge graph from them and then query the graph based on combined relationships.
|
||||
Cognee will take your documents, generate a knowledge graph from them and then query the graph based on combined relationships.
|
||||
|
||||
Now, run a minimal pipeline:
|
||||
|
||||
```python
|
||||
import cognee
|
||||
import asyncio
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
@ -143,7 +144,7 @@ async def main():
|
|||
|
||||
# Display the results
|
||||
for result in results:
|
||||
print(result)
|
||||
pprint(result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
@ -157,7 +158,7 @@ As you can see, the output is generated from the document we previously stored i
|
|||
Cognee turns documents into AI memory.
|
||||
```
|
||||
|
||||
### Use the Cognee CLI
|
||||
### Use the Cognee CLI
|
||||
|
||||
As an alternative, you can get started with these essential commands:
|
||||
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Generic single-database configuration with an async dbapi.
|
||||
Generic single-database configuration with an async dbapi.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
"""Enable delete for old tutorial notebooks
|
||||
|
||||
Revision ID: 1a58b986e6e1
|
||||
Revises: 46a6ce2bd2b2
|
||||
Create Date: 2025-12-17 11:04:44.414259
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "1a58b986e6e1"
|
||||
down_revision: Union[str, None] = "e1ec1dcb50b6"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def change_tutorial_deletable_flag(deletable: bool) -> None:
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
if "notebooks" not in inspector.get_table_names():
|
||||
return
|
||||
|
||||
columns = {col["name"] for col in inspector.get_columns("notebooks")}
|
||||
required_columns = {"name", "deletable"}
|
||||
if not required_columns.issubset(columns):
|
||||
return
|
||||
|
||||
notebooks = sa.table(
|
||||
"notebooks",
|
||||
sa.Column("name", sa.String()),
|
||||
sa.Column("deletable", sa.Boolean()),
|
||||
)
|
||||
|
||||
tutorial_name = "Python Development with Cognee Tutorial 🧠"
|
||||
|
||||
bind.execute(
|
||||
notebooks.update().where(notebooks.c.name == tutorial_name).values(deletable=deletable)
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
change_tutorial_deletable_flag(True)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
change_tutorial_deletable_flag(False)
|
||||
|
|
@ -43,10 +43,10 @@ Saiba mais sobre os [casos de uso](https://docs.cognee.ai/use-cases) e [avaliaç
|
|||
|
||||
## Funcionalidades
|
||||
|
||||
- Conecte e recupere suas conversas passadas, documentos, imagens e transcrições de áudio
|
||||
- Reduza alucinações, esforço de desenvolvimento e custos
|
||||
- Carregue dados em bancos de dados de grafos e vetores usando apenas Pydantic
|
||||
- Transforme e organize seus dados enquanto os coleta de mais de 30 fontes diferentes
|
||||
- Conecte e recupere suas conversas passadas, documentos, imagens e transcrições de áudio
|
||||
- Reduza alucinações, esforço de desenvolvimento e custos
|
||||
- Carregue dados em bancos de dados de grafos e vetores usando apenas Pydantic
|
||||
- Transforme e organize seus dados enquanto os coleta de mais de 30 fontes diferentes
|
||||
|
||||
## Primeiros Passos
|
||||
|
||||
|
|
@ -108,7 +108,7 @@ if __name__ == '__main__':
|
|||
Exemplo do output:
|
||||
```
|
||||
O Processamento de Linguagem Natural (NLP) é um campo interdisciplinar e transdisciplinar que envolve ciência da computação e recuperação de informações. Ele se concentra na interação entre computadores e a linguagem humana, permitindo que as máquinas compreendam e processem a linguagem natural.
|
||||
|
||||
|
||||
```
|
||||
|
||||
Visualização do grafo:
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ if __name__ == '__main__':
|
|||
2. Простая демонстрация GraphRAG
|
||||
[Видео](https://github.com/user-attachments/assets/d80b0776-4eb9-4b8e-aa22-3691e2d44b8f)
|
||||
|
||||
3. Cognee с Ollama
|
||||
3. Cognee с Ollama
|
||||
[Видео](https://github.com/user-attachments/assets/8621d3e8-ecb8-4860-afb2-5594f2ee17db)
|
||||
|
||||
## Правила поведения
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ if __name__ == '__main__':
|
|||
示例输出:
|
||||
```
|
||||
自然语言处理(NLP)是计算机科学和信息检索的跨学科领域。它关注计算机和人类语言之间的交互,使机器能够理解和处理自然语言。
|
||||
|
||||
|
||||
```
|
||||
图形可视化:
|
||||
<a href="https://rawcdn.githack.com/topoteretes/cognee/refs/heads/main/assets/graph_visualization.html"><img src="https://rawcdn.githack.com/topoteretes/cognee/refs/heads/main/assets/graph_visualization.png" width="100%" alt="图形可视化"></a>
|
||||
|
|
|
|||
3814
cognee-frontend/package-lock.json
generated
3814
cognee-frontend/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -9,14 +9,15 @@
|
|||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@auth0/nextjs-auth0": "^4.13.1",
|
||||
"@auth0/nextjs-auth0": "^4.14.0",
|
||||
"classnames": "^2.5.1",
|
||||
"culori": "^4.0.1",
|
||||
"d3-force-3d": "^3.0.6",
|
||||
"next": "16.0.4",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"next": "^16.1.1",
|
||||
"react": "^19.2.3",
|
||||
"react-dom": "^19.2.3",
|
||||
"react-force-graph-2d": "^1.27.1",
|
||||
"react-markdown": "^10.1.0",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ export default function CogneeAddWidget({ onData, useCloud = false }: CogneeAddW
|
|||
setTrue: setProcessingFilesInProgress,
|
||||
setFalse: setProcessingFilesDone,
|
||||
} = useBoolean(false);
|
||||
|
||||
|
||||
const handleAddFiles = (dataset: Dataset, event: ChangeEvent<HTMLInputElement>) => {
|
||||
event.stopPropagation();
|
||||
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ export default function GraphControls({ data, isAddNodeFormOpen, onGraphShapeCha
|
|||
|
||||
const [isAuthShapeChangeEnabled, setIsAuthShapeChangeEnabled] = useState(true);
|
||||
const shapeChangeTimeout = useRef<number | null>(null);
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
onGraphShapeChange(DEFAULT_GRAPH_SHAPE);
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ export default function GraphVisualization({ ref, data, graphControls, className
|
|||
// Initial size calculation
|
||||
handleResize();
|
||||
|
||||
// ResizeObserver
|
||||
// ResizeObserver
|
||||
const resizeObserver = new ResizeObserver(() => {
|
||||
handleResize();
|
||||
});
|
||||
|
|
@ -216,7 +216,7 @@ export default function GraphVisualization({ ref, data, graphControls, className
|
|||
}, [data, graphRef]);
|
||||
|
||||
const [graphShape, setGraphShape] = useState<string>();
|
||||
|
||||
|
||||
const zoomToFit: ForceGraphMethods["zoomToFit"] = (
|
||||
durationMs?: number,
|
||||
padding?: number,
|
||||
|
|
@ -227,15 +227,15 @@ export default function GraphVisualization({ ref, data, graphControls, className
|
|||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return undefined as any;
|
||||
}
|
||||
|
||||
|
||||
return graphRef.current.zoomToFit?.(durationMs, padding, nodeFilter);
|
||||
};
|
||||
|
||||
|
||||
useImperativeHandle(ref, () => ({
|
||||
zoomToFit,
|
||||
setGraphShape,
|
||||
}));
|
||||
|
||||
|
||||
|
||||
return (
|
||||
<div ref={containerRef} className={classNames("w-full h-full", className)} id="graph-container">
|
||||
|
|
|
|||
|
|
@ -1373,4 +1373,4 @@
|
|||
"padding": 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ import AddDataToCognee from "./AddDataToCognee";
|
|||
import NotebooksAccordion from "./NotebooksAccordion";
|
||||
import CogneeInstancesAccordion from "./CogneeInstancesAccordion";
|
||||
import InstanceDatasetsAccordion from "./InstanceDatasetsAccordion";
|
||||
import cloudFetch from "@/modules/instances/cloudFetch";
|
||||
import localFetch from "@/modules/instances/localFetch";
|
||||
|
||||
interface DashboardProps {
|
||||
user?: {
|
||||
|
|
@ -26,6 +28,17 @@ interface DashboardProps {
|
|||
accessToken: string;
|
||||
}
|
||||
|
||||
const cogneeInstances = {
|
||||
cloudCognee: {
|
||||
name: "CloudCognee",
|
||||
fetch: cloudFetch,
|
||||
},
|
||||
localCognee: {
|
||||
name: "LocalCognee",
|
||||
fetch: localFetch,
|
||||
}
|
||||
};
|
||||
|
||||
export default function Dashboard({ accessToken }: DashboardProps) {
|
||||
fetch.setAccessToken(accessToken);
|
||||
const { user } = useAuthenticatedUser();
|
||||
|
|
@ -38,7 +51,7 @@ export default function Dashboard({ accessToken }: DashboardProps) {
|
|||
updateNotebook,
|
||||
saveNotebook,
|
||||
removeNotebook,
|
||||
} = useNotebooks();
|
||||
} = useNotebooks(cogneeInstances.localCognee);
|
||||
|
||||
useEffect(() => {
|
||||
if (!notebooks.length) {
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ export default function DatasetsAccordion({
|
|||
} = useBoolean(false);
|
||||
|
||||
const [datasetToRemove, setDatasetToRemove] = useState<Dataset | null>(null);
|
||||
|
||||
|
||||
const handleDatasetRemove = (dataset: Dataset) => {
|
||||
setDatasetToRemove(dataset);
|
||||
openRemoveDatasetModal();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { useCallback, useEffect } from "react";
|
|||
|
||||
import { fetch, isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { checkCloudConnection } from "@/modules/cloud";
|
||||
import { setApiKey } from "@/modules/instances/cloudFetch";
|
||||
import { CaretIcon, CloseIcon, CloudIcon, LocalCogneeIcon } from "@/ui/Icons";
|
||||
import { CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements";
|
||||
|
||||
|
|
@ -24,6 +25,7 @@ export default function InstanceDatasetsAccordion({ onDatasetsChange }: Instance
|
|||
const checkConnectionToCloudCognee = useCallback((apiKey?: string) => {
|
||||
if (apiKey) {
|
||||
fetch.setApiKey(apiKey);
|
||||
setApiKey(apiKey);
|
||||
}
|
||||
return checkCloudConnection()
|
||||
.then(setCloudCogneeConnected)
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ export default function Plan() {
|
|||
<div className="bg-white rounded-xl px-5 py-5 mb-2">
|
||||
Affordable and transparent pricing
|
||||
</div>
|
||||
|
||||
|
||||
<div className="grid grid-cols-3 gap-x-2.5">
|
||||
<div className="pt-13 py-4 px-5 mb-2.5 rounded-tl-xl rounded-tr-xl bg-white h-full">
|
||||
<div>Basic</div>
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ export default function useChat(dataset: Dataset) {
|
|||
setTrue: disableSearchRun,
|
||||
setFalse: enableSearchRun,
|
||||
} = useBoolean(false);
|
||||
|
||||
|
||||
const refreshChat = useCallback(async () => {
|
||||
const data = await fetchMessages();
|
||||
return setMessages(data);
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ function useDatasets(useCloud = false) {
|
|||
// checkDatasetStatuses(datasets);
|
||||
// }, 50000);
|
||||
// }, [fetchDatasetStatuses]);
|
||||
|
||||
|
||||
// useEffect(() => {
|
||||
// return () => {
|
||||
// if (statusTimeout.current !== null) {
|
||||
|
|
@ -95,6 +95,7 @@ function useDatasets(useCloud = false) {
|
|||
})
|
||||
.catch((error) => {
|
||||
console.error('Error fetching datasets:', error);
|
||||
throw error;
|
||||
});
|
||||
}, [useCloud]);
|
||||
|
||||
|
|
|
|||
59
cognee-frontend/src/modules/instances/cloudFetch.ts
Normal file
59
cognee-frontend/src/modules/instances/cloudFetch.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
// let numberOfRetries = 0;
|
||||
|
||||
const cloudApiUrl = process.env.NEXT_PUBLIC_CLOUD_API_URL || "http://localhost:8001";
|
||||
|
||||
let apiKey: string | null = process.env.NEXT_PUBLIC_COGWIT_API_KEY || null;
|
||||
|
||||
export function setApiKey(newApiKey: string) {
|
||||
apiKey = newApiKey;
|
||||
};
|
||||
|
||||
export default async function cloudFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
// function retry(lastError: Response) {
|
||||
// if (numberOfRetries >= 1) {
|
||||
// return Promise.reject(lastError);
|
||||
// }
|
||||
|
||||
// numberOfRetries += 1;
|
||||
|
||||
// return global.fetch("/auth/token")
|
||||
// .then(() => {
|
||||
// return fetch(url, options);
|
||||
// });
|
||||
// }
|
||||
|
||||
const authHeaders = {
|
||||
"Authorization": `X-Api-Key ${apiKey}`,
|
||||
};
|
||||
|
||||
return global.fetch(
|
||||
cloudApiUrl + "/api" + (typeof url === "string" ? url : url.toString()).replace("/v1", ""),
|
||||
{
|
||||
...options,
|
||||
headers: {
|
||||
...options.headers,
|
||||
...authHeaders,
|
||||
} as HeadersInit,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, true))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
// .finally(() => {
|
||||
// numberOfRetries = 0;
|
||||
// });
|
||||
}
|
||||
27
cognee-frontend/src/modules/instances/localFetch.ts
Normal file
27
cognee-frontend/src/modules/instances/localFetch.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
const localApiUrl = process.env.NEXT_PUBLIC_LOCAL_API_URL || "http://localhost:8000";
|
||||
|
||||
export default async function localFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
return global.fetch(
|
||||
localApiUrl + "/api" + (typeof url === "string" ? url : url.toString()),
|
||||
{
|
||||
...options,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, false))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
}
|
||||
4
cognee-frontend/src/modules/instances/types.ts
Normal file
4
cognee-frontend/src/modules/instances/types.ts
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
export interface CogneeInstance {
|
||||
name: string;
|
||||
fetch: typeof global.fetch;
|
||||
}
|
||||
13
cognee-frontend/src/modules/notebooks/createNotebook.ts
Normal file
13
cognee-frontend/src/modules/notebooks/createNotebook.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function createNotebook(notebookName: string, instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
7
cognee-frontend/src/modules/notebooks/deleteNotebook.ts
Normal file
7
cognee-frontend/src/modules/notebooks/deleteNotebook.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function deleteNotebook(notebookId: string, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
}
|
||||
12
cognee-frontend/src/modules/notebooks/getNotebooks.ts
Normal file
12
cognee-frontend/src/modules/notebooks/getNotebooks.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function getNotebooks(instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
method: "GET",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
14
cognee-frontend/src/modules/notebooks/runNotebookCell.ts
Normal file
14
cognee-frontend/src/modules/notebooks/runNotebookCell.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import { Cell } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function runNotebookCell(notebookId: string, cell: Cell, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) => response.json());
|
||||
}
|
||||
13
cognee-frontend/src/modules/notebooks/saveNotebook.ts
Normal file
13
cognee-frontend/src/modules/notebooks/saveNotebook.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function saveNotebook(notebookId: string, notebookData: object, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
body: JSON.stringify(notebookData),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
|
|
@ -1,20 +1,18 @@
|
|||
import { useCallback, useState } from "react";
|
||||
import { fetch, isCloudEnvironment } from "@/utils";
|
||||
import { Cell, Notebook } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
import createNotebook from "./createNotebook";
|
||||
import deleteNotebook from "./deleteNotebook";
|
||||
import getNotebooks from "./getNotebooks";
|
||||
import runNotebookCell from "./runNotebookCell";
|
||||
import { default as persistNotebook } from "./saveNotebook";
|
||||
|
||||
function useNotebooks() {
|
||||
function useNotebooks(instance: CogneeInstance) {
|
||||
const [notebooks, setNotebooks] = useState<Notebook[]>([]);
|
||||
|
||||
const addNotebook = useCallback((notebookName: string) => {
|
||||
return fetch("/v1/notebooks", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
.then((notebook) => {
|
||||
return createNotebook(notebookName, instance)
|
||||
.then((notebook: Notebook) => {
|
||||
setNotebooks((notebooks) => [
|
||||
...notebooks,
|
||||
notebook,
|
||||
|
|
@ -22,36 +20,29 @@ function useNotebooks() {
|
|||
|
||||
return notebook;
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const removeNotebook = useCallback((notebookId: string) => {
|
||||
return fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
}, isCloudEnvironment())
|
||||
return deleteNotebook(notebookId, instance)
|
||||
.then(() => {
|
||||
setNotebooks((notebooks) =>
|
||||
notebooks.filter((notebook) => notebook.id !== notebookId)
|
||||
);
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const fetchNotebooks = useCallback(() => {
|
||||
return fetch("/v1/notebooks", {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
return getNotebooks(instance)
|
||||
.then((notebooks) => {
|
||||
setNotebooks(notebooks);
|
||||
|
||||
return notebooks;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("Error fetching notebooks:", error);
|
||||
console.error("Error fetching notebooks:", error.detail);
|
||||
throw error
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const updateNotebook = useCallback((updatedNotebook: Notebook) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
|
|
@ -64,20 +55,13 @@ function useNotebooks() {
|
|||
}, []);
|
||||
|
||||
const saveNotebook = useCallback((notebook: Notebook) => {
|
||||
return fetch(`/v1/notebooks/${notebook.id}`, {
|
||||
body: JSON.stringify({
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
}, []);
|
||||
return persistNotebook(notebook.id, {
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}, instance);
|
||||
}, [instance]);
|
||||
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell, cogneeInstance: string) => {
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
existingNotebook.id === notebook.id ? {
|
||||
|
|
@ -89,20 +73,11 @@ function useNotebooks() {
|
|||
error: undefined,
|
||||
} : existingCell
|
||||
),
|
||||
} : notebook
|
||||
} : existingNotebook
|
||||
)
|
||||
);
|
||||
|
||||
return fetch(`/v1/notebooks/${notebook.id}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, cogneeInstance === "cloud")
|
||||
.then((response) => response.json())
|
||||
return runNotebookCell(notebook.id, cell, instance)
|
||||
.then((response) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
|
|
@ -115,11 +90,11 @@ function useNotebooks() {
|
|||
error: response.error,
|
||||
} : existingCell
|
||||
),
|
||||
} : notebook
|
||||
} : existingNotebook
|
||||
)
|
||||
);
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
return {
|
||||
notebooks,
|
||||
|
|
|
|||
|
|
@ -7,4 +7,4 @@ export default function GitHubIcon({ width = 24, height = 24, color = 'currentCo
|
|||
</g>
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ export default function Header({ user }: HeaderProps) {
|
|||
|
||||
checkMCPConnection();
|
||||
const interval = setInterval(checkMCPConnection, 30000);
|
||||
|
||||
|
||||
return () => clearInterval(interval);
|
||||
}, [setMCPConnected, setMCPDisconnected]);
|
||||
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ export default function SearchView() {
|
|||
scrollToBottom();
|
||||
|
||||
setSearchInputValue("");
|
||||
|
||||
|
||||
// Pass topK to sendMessage
|
||||
sendMessage(chatInput, searchType, topK)
|
||||
.then(scrollToBottom)
|
||||
|
|
@ -171,4 +171,4 @@ export default function SearchView() {
|
|||
</form>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,2 @@
|
|||
export { default as Modal } from "./Modal";
|
||||
export { default as useModal } from "./useModal";
|
||||
|
||||
|
|
|
|||
76
cognee-frontend/src/ui/elements/Notebook/MarkdownPreview.tsx
Normal file
76
cognee-frontend/src/ui/elements/Notebook/MarkdownPreview.tsx
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import { memo } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
|
||||
interface MarkdownPreviewProps {
|
||||
content: string;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
function MarkdownPreview({ content, className = "" }: MarkdownPreviewProps) {
|
||||
return (
|
||||
<div className={`min-h-24 max-h-96 overflow-y-auto p-4 prose prose-sm max-w-none ${className}`}>
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
h1: ({ children }) => <h1 className="text-2xl font-bold mt-4 mb-2">{children}</h1>,
|
||||
h2: ({ children }) => <h2 className="text-xl font-bold mt-3 mb-2">{children}</h2>,
|
||||
h3: ({ children }) => <h3 className="text-lg font-bold mt-3 mb-2">{children}</h3>,
|
||||
h4: ({ children }) => <h4 className="text-base font-bold mt-2 mb-1">{children}</h4>,
|
||||
h5: ({ children }) => <h5 className="text-sm font-bold mt-2 mb-1">{children}</h5>,
|
||||
h6: ({ children }) => <h6 className="text-xs font-bold mt-2 mb-1">{children}</h6>,
|
||||
p: ({ children }) => <p className="mb-2">{children}</p>,
|
||||
ul: ({ children }) => <ul className="list-disc list-inside mb-2 ml-4">{children}</ul>,
|
||||
ol: ({ children }) => <ol className="list-decimal list-inside mb-2 ml-4">{children}</ol>,
|
||||
li: ({ children }) => <li className="mb-1">{children}</li>,
|
||||
blockquote: ({ children }) => (
|
||||
<blockquote className="border-l-4 border-gray-300 pl-4 italic my-2">{children}</blockquote>
|
||||
),
|
||||
code: ({ className, children, ...props }) => {
|
||||
const isInline = !className;
|
||||
return isInline ? (
|
||||
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
) : (
|
||||
<code className="block bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
pre: ({ children }) => (
|
||||
<pre className="bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto mb-2">
|
||||
{children}
|
||||
</pre>
|
||||
),
|
||||
a: ({ href, children }) => (
|
||||
<a href={href} className="text-blue-600 hover:underline" target="_blank" rel="noopener noreferrer">
|
||||
{children}
|
||||
</a>
|
||||
),
|
||||
strong: ({ children }) => <strong className="font-bold">{children}</strong>,
|
||||
em: ({ children }) => <em className="italic">{children}</em>,
|
||||
hr: () => <hr className="my-4 border-gray-300" />,
|
||||
table: ({ children }) => (
|
||||
<div className="overflow-x-auto my-2">
|
||||
<table className="min-w-full border border-gray-300">{children}</table>
|
||||
</div>
|
||||
),
|
||||
thead: ({ children }) => <thead className="bg-gray-100">{children}</thead>,
|
||||
tbody: ({ children }) => <tbody>{children}</tbody>,
|
||||
tr: ({ children }) => <tr className="border-b border-gray-300">{children}</tr>,
|
||||
th: ({ children }) => (
|
||||
<th className="border border-gray-300 px-4 py-2 text-left font-bold">
|
||||
{children}
|
||||
</th>
|
||||
),
|
||||
td: ({ children }) => (
|
||||
<td className="border border-gray-300 px-4 py-2">{children}</td>
|
||||
),
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(MarkdownPreview);
|
||||
|
|
@ -2,15 +2,17 @@
|
|||
|
||||
import { v4 as uuid4 } from "uuid";
|
||||
import classNames from "classnames";
|
||||
import { Fragment, MouseEvent, RefObject, useCallback, useEffect, useRef, useState } from "react";
|
||||
import { Fragment, MouseEvent, MutableRefObject, useCallback, useEffect, useRef, useState, memo } from "react";
|
||||
|
||||
import { useModal } from "@/ui/elements/Modal";
|
||||
import { CaretIcon, CloseIcon, PlusIcon } from "@/ui/Icons";
|
||||
import { IconButton, PopupMenu, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import { GraphControlsAPI } from "@/app/(graph)/GraphControls";
|
||||
import GraphVisualization, { GraphVisualizationAPI } from "@/app/(graph)/GraphVisualization";
|
||||
|
||||
import NotebookCellHeader from "./NotebookCellHeader";
|
||||
import MarkdownPreview from "./MarkdownPreview";
|
||||
import { Cell, Notebook as NotebookType } from "./types";
|
||||
|
||||
interface NotebookProps {
|
||||
|
|
@ -19,7 +21,186 @@ interface NotebookProps {
|
|||
updateNotebook: (updatedNotebook: NotebookType) => void;
|
||||
}
|
||||
|
||||
interface NotebookCellProps {
|
||||
cell: Cell;
|
||||
index: number;
|
||||
isOpen: boolean;
|
||||
isMarkdownEditMode: boolean;
|
||||
onToggleOpen: () => void;
|
||||
onToggleMarkdownEdit: () => void;
|
||||
onContentChange: (value: string) => void;
|
||||
onCellRun: (cell: Cell, cogneeInstance: string) => Promise<void>;
|
||||
onCellRename: (cell: Cell) => void;
|
||||
onCellRemove: (cell: Cell) => void;
|
||||
onCellUp: (cell: Cell) => void;
|
||||
onCellDown: (cell: Cell) => void;
|
||||
onCellAdd: (afterCellIndex: number, cellType: "markdown" | "code") => void;
|
||||
}
|
||||
|
||||
const NotebookCell = memo(function NotebookCell({
|
||||
cell,
|
||||
index,
|
||||
isOpen,
|
||||
isMarkdownEditMode,
|
||||
onToggleOpen,
|
||||
onToggleMarkdownEdit,
|
||||
onContentChange,
|
||||
onCellRun,
|
||||
onCellRename,
|
||||
onCellRemove,
|
||||
onCellUp,
|
||||
onCellDown,
|
||||
onCellAdd,
|
||||
}: NotebookCellProps) {
|
||||
return (
|
||||
<Fragment>
|
||||
<div className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={onCellRun}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<div className="relative rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 overflow-hidden">
|
||||
<GhostButton
|
||||
onClick={onToggleMarkdownEdit}
|
||||
className="absolute top-2 right-2.5 text-xs leading-[1] !px-2 !py-1 !h-auto"
|
||||
>
|
||||
{isMarkdownEditMode ? "Preview" : "Edit"}
|
||||
</GhostButton>
|
||||
{isMarkdownEditMode ? (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="markdownInput"
|
||||
placeholder="Type your markdown here..."
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0 !bg-gray-50"
|
||||
/>
|
||||
) : (
|
||||
<MarkdownPreview content={cell.content} className="!bg-gray-50" />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => onCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => onCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
);
|
||||
});
|
||||
|
||||
export default function Notebook({ notebook, updateNotebook, runCell }: NotebookProps) {
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
const [markdownEditMode, setMarkdownEditMode] = useState<Set<string>>(new Set());
|
||||
|
||||
const toggleCellOpen = useCallback((id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
const toggleMarkdownEditMode = useCallback((id: string) => {
|
||||
setMarkdownEditMode((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id);
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (notebook.cells.length === 0) {
|
||||
const newCell: Cell = {
|
||||
|
|
@ -34,7 +215,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
});
|
||||
toggleCellOpen(newCell.id)
|
||||
}
|
||||
}, [notebook, updateNotebook]);
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
|
||||
const handleCellRun = useCallback((cell: Cell, cogneeInstance: string) => {
|
||||
return runCell(notebook, cell, cogneeInstance);
|
||||
|
|
@ -43,7 +224,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
const handleCellAdd = useCallback((afterCellIndex: number, cellType: "markdown" | "code") => {
|
||||
const newCell: Cell = {
|
||||
id: uuid4(),
|
||||
name: "new cell",
|
||||
name: cellType === "markdown" ? "Markdown Cell" : "Code Cell",
|
||||
type: cellType,
|
||||
content: "",
|
||||
};
|
||||
|
|
@ -59,7 +240,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
|
||||
toggleCellOpen(newCell.id);
|
||||
updateNotebook(newNotebook);
|
||||
}, [notebook, updateNotebook]);
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
|
||||
const removeCell = useCallback((cell: Cell, event?: MouseEvent) => {
|
||||
event?.preventDefault();
|
||||
|
|
@ -81,14 +262,12 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
openCellRemoveConfirmModal(cell);
|
||||
}, [openCellRemoveConfirmModal]);
|
||||
|
||||
const handleCellInputChange = useCallback((notebook: NotebookType, cell: Cell, value: string) => {
|
||||
const newCell = {...cell, content: value };
|
||||
|
||||
const handleCellInputChange = useCallback((cellId: string, value: string) => {
|
||||
updateNotebook({
|
||||
...notebook,
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === newCell.id ? newCell : cell)),
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === cellId ? {...cell, content: value} : cell)),
|
||||
});
|
||||
}, [updateNotebook]);
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const handleCellUp = useCallback((cell: Cell) => {
|
||||
const index = notebook.cells.indexOf(cell);
|
||||
|
|
@ -131,133 +310,28 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
}
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
|
||||
const toggleCellOpen = (id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="bg-white rounded-xl flex flex-col gap-0.5 px-7 py-5 flex-1">
|
||||
<div className="mb-5">{notebook.name}</div>
|
||||
|
||||
{notebook.cells.map((cell: Cell, index) => (
|
||||
<Fragment key={cell.id}>
|
||||
<div key={cell.id} className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={handleCellRun}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your text here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => handleCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => handleCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
<NotebookCell
|
||||
key={cell.id}
|
||||
cell={cell}
|
||||
index={index}
|
||||
isOpen={openCells.has(cell.id)}
|
||||
isMarkdownEditMode={markdownEditMode.has(cell.id)}
|
||||
onToggleOpen={() => toggleCellOpen(cell.id)}
|
||||
onToggleMarkdownEdit={() => toggleMarkdownEditMode(cell.id)}
|
||||
onContentChange={(value) => handleCellInputChange(cell.id, value)}
|
||||
onCellRun={handleCellRun}
|
||||
onCellRename={handleCellRename}
|
||||
onCellRemove={handleCellRemove}
|
||||
onCellUp={handleCellUp}
|
||||
onCellDown={handleCellDown}
|
||||
onCellAdd={handleCellAdd}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
|
|
@ -288,6 +362,10 @@ function CellResult({ content }: { content: [] }) {
|
|||
getSelectedNode: () => null,
|
||||
});
|
||||
|
||||
if (content.length === 0) {
|
||||
return <span>OK</span>;
|
||||
}
|
||||
|
||||
for (const line of content) {
|
||||
try {
|
||||
if (Array.isArray(line)) {
|
||||
|
|
@ -298,7 +376,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph</span>
|
||||
<GraphVisualization
|
||||
data={transformInsightsGraphData(line)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -346,7 +424,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -356,8 +434,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
const datasets = Array.from(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
new Set(Object.values(line["datasets"]).map((dataset: any) => dataset.name))
|
||||
|
|
@ -369,39 +446,46 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="block px-2 py-2 whitespace-normal">{line["result"]}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (typeof(line) === "object" && line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
if (line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line["result"], null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
if (typeof(line) === "string") {
|
||||
else if (typeof(line) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line, null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
else if (typeof(line) === "string") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
} catch {
|
||||
// It is fine if we don't manage to parse the output line, we show it as it is.
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
|
|
@ -415,7 +499,6 @@ function CellResult({ content }: { content: [] }) {
|
|||
{item}
|
||||
</div>
|
||||
));
|
||||
|
||||
};
|
||||
|
||||
function transformToVisualizationData(graph: { nodes: [], edges: [] }) {
|
||||
|
|
@ -451,7 +534,7 @@ function transformInsightsGraphData(triplets: Triplet[]) {
|
|||
target: string,
|
||||
label: string,
|
||||
}
|
||||
} = {};
|
||||
} = {};
|
||||
|
||||
for (const triplet of triplets) {
|
||||
nodes[triplet[0].id] = {
|
||||
|
|
@ -471,7 +554,7 @@ function transformInsightsGraphData(triplets: Triplet[]) {
|
|||
label: triplet[1]["relationship_name"],
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
nodes: Object.values(nodes),
|
||||
links: Object.values(links),
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import classNames from "classnames";
|
||||
|
||||
import { isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { PlayIcon } from "@/ui/Icons";
|
||||
import { PopupMenu, IconButton } from "@/ui/elements";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton } from "@/ui/elements";
|
||||
import { LoadingIndicator } from "@/ui/App";
|
||||
|
||||
import { Cell } from "./types";
|
||||
|
|
@ -39,7 +42,7 @@ export default function NotebookCellHeader({
|
|||
if (runCell) {
|
||||
setIsRunningCell();
|
||||
runCell(cell, runInstance)
|
||||
.then(() => {
|
||||
.finally(() => {
|
||||
setIsNotRunningCell();
|
||||
});
|
||||
}
|
||||
|
|
@ -53,7 +56,7 @@ export default function NotebookCellHeader({
|
|||
{isRunningCell ? <LoadingIndicator /> : <IconButton onClick={handleCellRun}><PlayIcon /></IconButton>}
|
||||
</>
|
||||
)}
|
||||
<span className="ml-4">{cell.name}</span>
|
||||
<span className="ml-4">{cell.type === "markdown" ? "Markdown Cell" : cell.name}</span>
|
||||
</div>
|
||||
<div className="pr-4 flex flex-row items-center gap-8">
|
||||
{runCell && (
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
"use client";
|
||||
|
||||
import classNames from "classnames";
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useLayoutEffect, useRef } from "react"
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useRef } from "react"
|
||||
|
||||
interface TextAreaProps extends Omit<InputHTMLAttributes<HTMLTextAreaElement>, "onChange"> {
|
||||
isAutoExpanding?: boolean; // Set to true to enable auto-expanding text area behavior. Default is false.
|
||||
value: string;
|
||||
onChange: (value: string) => void;
|
||||
value?: string;
|
||||
onChange?: (value: string) => void;
|
||||
}
|
||||
|
||||
export default function TextArea({
|
||||
|
|
@ -19,95 +19,81 @@ export default function TextArea({
|
|||
placeholder = "",
|
||||
onKeyUp,
|
||||
...props
|
||||
}: TextAreaProps) {
|
||||
const handleTextChange = useCallback((event: Event) => {
|
||||
const fakeTextAreaElement = event.target as HTMLDivElement;
|
||||
const newValue = fakeTextAreaElement.innerText;
|
||||
}: TextAreaProps) {
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const maxHeightRef = useRef<number | null>(null);
|
||||
const throttleTimeoutRef = useRef<number | null>(null);
|
||||
const lastAdjustTimeRef = useRef<number>(0);
|
||||
const THROTTLE_MS = 250; // 4 calculations per second
|
||||
|
||||
const adjustHeight = useCallback(() => {
|
||||
if (!isAutoExpanding || !textareaRef.current) return;
|
||||
|
||||
const textarea = textareaRef.current;
|
||||
|
||||
// Cache maxHeight on first calculation
|
||||
if (maxHeightRef.current === null) {
|
||||
const computedStyle = getComputedStyle(textarea);
|
||||
maxHeightRef.current = computedStyle.maxHeight === "none"
|
||||
? Infinity
|
||||
: parseInt(computedStyle.maxHeight) || Infinity;
|
||||
}
|
||||
|
||||
// Reset height to auto to get the correct scrollHeight
|
||||
textarea.style.height = "auto";
|
||||
// Set height to scrollHeight, but respect max-height
|
||||
const scrollHeight = textarea.scrollHeight;
|
||||
textarea.style.height = `${Math.min(scrollHeight, maxHeightRef.current)}px`;
|
||||
lastAdjustTimeRef.current = Date.now();
|
||||
}, [isAutoExpanding]);
|
||||
|
||||
const handleChange = useCallback((event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
const newValue = event.target.value;
|
||||
onChange?.(newValue);
|
||||
}, [onChange]);
|
||||
|
||||
const handleKeyUp = useCallback((event: Event) => {
|
||||
if (onKeyUp) {
|
||||
onKeyUp(event as unknown as React.KeyboardEvent<HTMLTextAreaElement>);
|
||||
}
|
||||
}, [onKeyUp]);
|
||||
// Throttle height adjustments to avoid blocking typing
|
||||
if (isAutoExpanding) {
|
||||
const now = Date.now();
|
||||
const timeSinceLastAdjust = now - lastAdjustTimeRef.current;
|
||||
|
||||
const handleTextAreaFocus = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (event.target.innerText.trim() === placeholder) {
|
||||
event.target.innerText = "";
|
||||
}
|
||||
};
|
||||
const handleTextAreaBlur = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (value === "") {
|
||||
event.target.innerText = placeholder;
|
||||
}
|
||||
};
|
||||
|
||||
const handleChange = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
onChange(event.target.value);
|
||||
};
|
||||
|
||||
const fakeTextAreaRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement && fakeTextAreaElement.innerText.trim() !== "") {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
}
|
||||
}, [placeholder]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.addEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.addEventListener("keyup", handleKeyUp);
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.removeEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.removeEventListener("keyup", handleKeyUp);
|
||||
if (timeSinceLastAdjust >= THROTTLE_MS) {
|
||||
adjustHeight();
|
||||
} else {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
}
|
||||
throttleTimeoutRef.current = window.setTimeout(() => {
|
||||
adjustHeight();
|
||||
throttleTimeoutRef.current = null;
|
||||
}, THROTTLE_MS - timeSinceLastAdjust);
|
||||
}
|
||||
};
|
||||
}, [handleKeyUp, handleTextChange]);
|
||||
}
|
||||
}, [onChange, isAutoExpanding, adjustHeight]);
|
||||
|
||||
useEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
const textAreaText = fakeTextAreaElement?.innerText;
|
||||
|
||||
if (fakeTextAreaElement && (value === "" || value === "\n")) {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
return;
|
||||
if (isAutoExpanding && textareaRef.current) {
|
||||
adjustHeight();
|
||||
}
|
||||
}, [value, isAutoExpanding, adjustHeight]);
|
||||
|
||||
if (fakeTextAreaElement && textAreaText !== value) {
|
||||
fakeTextAreaElement.innerText = value;
|
||||
}
|
||||
}, [placeholder, value]);
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return isAutoExpanding ? (
|
||||
<>
|
||||
<div
|
||||
ref={fakeTextAreaRef}
|
||||
contentEditable="true"
|
||||
role="textbox"
|
||||
aria-multiline="true"
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onFocus={handleTextAreaFocus}
|
||||
onBlur={handleTextAreaBlur}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
return (
|
||||
<textarea
|
||||
ref={isAutoExpanding ? textareaRef : undefined}
|
||||
name={name}
|
||||
style={style}
|
||||
value={value}
|
||||
placeholder={placeholder}
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onChange={handleChange}
|
||||
onKeyUp={onKeyUp}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -10,4 +10,4 @@ export { default as NeutralButton } from "./NeutralButton";
|
|||
export { default as StatusIndicator } from "./StatusIndicator";
|
||||
export { default as StatusDot } from "./StatusDot";
|
||||
export { default as Accordion } from "./Accordion";
|
||||
export { default as Notebook } from "./Notebook";
|
||||
export { default as Notebook } from "./Notebook";
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ export default async function fetch(url: string, options: RequestInit = {}, useC
|
|||
new Error("Backend server is not responding. Please check if the server is running.")
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
|
|
@ -74,7 +74,7 @@ export default async function fetch(url: string, options: RequestInit = {}, useC
|
|||
fetch.checkHealth = async () => {
|
||||
const maxRetries = 5;
|
||||
const retryDelay = 1000; // 1 second
|
||||
|
||||
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
try {
|
||||
const response = await global.fetch(`${backendApiUrl.replace("/api", "")}/health`);
|
||||
|
|
@ -90,7 +90,7 @@ fetch.checkHealth = async () => {
|
|||
await new Promise(resolve => setTimeout(resolve, retryDelay));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
throw new Error("Backend server is not responding after multiple attempts");
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
import { redirect } from "next/navigation";
|
||||
|
||||
export default function handleServerErrors(response: Response, retry?: (response: Response) => Promise<Response>, useCloud?: boolean): Promise<Response> {
|
||||
export default function handleServerErrors(
|
||||
response: Response,
|
||||
retry: ((response: Response) => Promise<Response>) | null = null,
|
||||
useCloud: boolean = false,
|
||||
): Promise<Response> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (response.status === 401 && !useCloud) {
|
||||
if ((response.status === 401 || response.status === 403) && !useCloud) {
|
||||
if (retry) {
|
||||
return retry(response)
|
||||
.catch(() => {
|
||||
|
|
|
|||
|
|
@ -105,14 +105,14 @@ If you'd rather run cognee-mcp in a container, you have two options:
|
|||
```bash
|
||||
# For HTTP transport (recommended for web deployments)
|
||||
docker run -e TRANSPORT_MODE=http --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
|
||||
# For SSE transport
|
||||
# For SSE transport
|
||||
docker run -e TRANSPORT_MODE=sse --env-file ./.env -p 8000:8000 --rm -it cognee/cognee-mcp:main
|
||||
# For stdio transport (default)
|
||||
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
|
||||
```
|
||||
|
||||
|
||||
**Installing optional dependencies at runtime:**
|
||||
|
||||
|
||||
You can install optional dependencies when running the container by setting the `EXTRAS` environment variable:
|
||||
```bash
|
||||
# Install a single optional dependency group at runtime
|
||||
|
|
@ -122,7 +122,7 @@ If you'd rather run cognee-mcp in a container, you have two options:
|
|||
--env-file ./.env \
|
||||
-p 8000:8000 \
|
||||
--rm -it cognee/cognee-mcp:main
|
||||
|
||||
|
||||
# Install multiple optional dependency groups at runtime (comma-separated)
|
||||
docker run \
|
||||
-e TRANSPORT_MODE=sse \
|
||||
|
|
@ -131,7 +131,7 @@ If you'd rather run cognee-mcp in a container, you have two options:
|
|||
-p 8000:8000 \
|
||||
--rm -it cognee/cognee-mcp:main
|
||||
```
|
||||
|
||||
|
||||
**Available optional dependency groups:**
|
||||
- `aws` - S3 storage support
|
||||
- `postgres` / `postgres-binary` - PostgreSQL database support
|
||||
|
|
@ -160,7 +160,7 @@ If you'd rather run cognee-mcp in a container, you have two options:
|
|||
# With stdio transport (default)
|
||||
docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
|
||||
```
|
||||
|
||||
|
||||
**With runtime installation of optional dependencies:**
|
||||
```bash
|
||||
# Install optional dependencies from Docker Hub image
|
||||
|
|
@ -357,7 +357,7 @@ You can configure both transports simultaneously for testing:
|
|||
"url": "http://localhost:8000/sse"
|
||||
},
|
||||
"cognee-http": {
|
||||
"type": "http",
|
||||
"type": "http",
|
||||
"url": "http://localhost:8000/mcp"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@ echo "Environment: $ENVIRONMENT"
|
|||
# Install optional dependencies if EXTRAS is set
|
||||
if [ -n "$EXTRAS" ]; then
|
||||
echo "Installing optional dependencies: $EXTRAS"
|
||||
|
||||
|
||||
# Get the cognee version that's currently installed
|
||||
COGNEE_VERSION=$(uv pip show cognee | grep "Version:" | awk '{print $2}')
|
||||
echo "Current cognee version: $COGNEE_VERSION"
|
||||
|
||||
|
||||
# Build the extras list for cognee
|
||||
IFS=',' read -ra EXTRA_ARRAY <<< "$EXTRAS"
|
||||
# Combine base extras from pyproject.toml with requested extras
|
||||
|
|
@ -28,11 +28,11 @@ if [ -n "$EXTRAS" ]; then
|
|||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
echo "Installing cognee with extras: $ALL_EXTRAS"
|
||||
echo "Running: uv pip install 'cognee[$ALL_EXTRAS]==$COGNEE_VERSION'"
|
||||
uv pip install "cognee[$ALL_EXTRAS]==$COGNEE_VERSION"
|
||||
|
||||
|
||||
# Verify installation
|
||||
echo ""
|
||||
echo "✓ Optional dependencies installation completed"
|
||||
|
|
@ -93,19 +93,19 @@ if [ -n "$API_URL" ]; then
|
|||
if echo "$API_URL" | grep -q "localhost" || echo "$API_URL" | grep -q "127.0.0.1"; then
|
||||
echo "⚠️ Warning: API_URL contains localhost/127.0.0.1"
|
||||
echo " Original: $API_URL"
|
||||
|
||||
|
||||
# Try to use host.docker.internal (works on Mac/Windows and recent Linux with Docker Desktop)
|
||||
FIXED_API_URL=$(echo "$API_URL" | sed 's/localhost/host.docker.internal/g' | sed 's/127\.0\.0\.1/host.docker.internal/g')
|
||||
|
||||
|
||||
echo " Converted to: $FIXED_API_URL"
|
||||
echo " This will work on Mac/Windows/Docker Desktop."
|
||||
echo " On Linux without Docker Desktop, you may need to:"
|
||||
echo " - Use --network host, OR"
|
||||
echo " - Set API_URL=http://172.17.0.1:8000 (Docker bridge IP)"
|
||||
|
||||
|
||||
API_URL="$FIXED_API_URL"
|
||||
fi
|
||||
|
||||
|
||||
API_ARGS="--api-url $API_URL"
|
||||
if [ -n "$API_TOKEN" ]; then
|
||||
API_ARGS="$API_ARGS --api-token $API_TOKEN"
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ class CogneeClient:
|
|||
|
||||
with redirect_stdout(sys.stderr):
|
||||
results = await self.cognee.search(
|
||||
query_type=SearchType[query_type.upper()], query_text=query_text
|
||||
query_type=SearchType[query_type.upper()], query_text=query_text, top_k=top_k
|
||||
)
|
||||
return results
|
||||
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ async def save_interaction(data: str) -> list:
|
|||
|
||||
|
||||
@mcp.tool()
|
||||
async def search(search_query: str, search_type: str) -> list:
|
||||
async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
|
||||
"""
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
||||
|
|
@ -389,6 +389,13 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
|
||||
The search_type is case-insensitive and will be converted to uppercase.
|
||||
|
||||
top_k : int, optional
|
||||
Maximum number of results to return (default: 10).
|
||||
Controls the amount of context retrieved from the knowledge graph.
|
||||
- Lower values (3-5): Faster, more focused results
|
||||
- Higher values (10-20): More comprehensive, but slower and more context-heavy
|
||||
Helps manage response size and context window usage in MCP clients.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list
|
||||
|
|
@ -425,13 +432,32 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
|
||||
"""
|
||||
|
||||
async def search_task(search_query: str, search_type: str) -> str:
|
||||
"""Search the knowledge graph"""
|
||||
async def search_task(search_query: str, search_type: str, top_k: int) -> str:
|
||||
"""
|
||||
Internal task to execute knowledge graph search with result formatting.
|
||||
|
||||
Handles the actual search execution and formats results appropriately
|
||||
for MCP clients based on the search type and execution mode (API vs direct).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
search_query : str
|
||||
The search query in natural language
|
||||
search_type : str
|
||||
Type of search to perform (GRAPH_COMPLETION, CHUNKS, etc.)
|
||||
top_k : int
|
||||
Maximum number of results to return
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Formatted search results as a string, with format depending on search_type
|
||||
"""
|
||||
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
||||
# going to stdout ( like the print function ) to stderr.
|
||||
with redirect_stdout(sys.stderr):
|
||||
search_results = await cognee_client.search(
|
||||
query_text=search_query, query_type=search_type
|
||||
query_text=search_query, query_type=search_type, top_k=top_k
|
||||
)
|
||||
|
||||
# Handle different result formats based on API vs direct mode
|
||||
|
|
@ -465,7 +491,7 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
else:
|
||||
return str(search_results)
|
||||
|
||||
search_results = await search_task(search_query, search_type)
|
||||
search_results = await search_task(search_query, search_type, top_k)
|
||||
return [types.TextContent(type="text", text=search_results)]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,4 +16,4 @@ EMBEDDING_API_VERSION=""
|
|||
|
||||
|
||||
GRAPHISTRY_USERNAME=""
|
||||
GRAPHISTRY_PASSWORD=""
|
||||
GRAPHISTRY_PASSWORD=""
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ This starter kit is deprecated. Its examples have been integrated into the `/new
|
|||
# Cognee Starter Kit
|
||||
Welcome to the <a href="https://github.com/topoteretes/cognee">cognee</a> Starter Repo! This repository is designed to help you get started quickly by providing a structured dataset and pre-built data pipelines using cognee to build powerful knowledge graphs.
|
||||
|
||||
You can use this repo to ingest, process, and visualize data in minutes.
|
||||
You can use this repo to ingest, process, and visualize data in minutes.
|
||||
|
||||
By following this guide, you will:
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ Custom model uses custom pydantic model for graph extraction. This script catego
|
|||
python src/pipelines/custom-model.py
|
||||
```
|
||||
|
||||
## Graph preview
|
||||
## Graph preview
|
||||
|
||||
cognee provides a visualize_graph function that will render the graph for you.
|
||||
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
chunk_size: int = None,
|
||||
config: Config = None,
|
||||
custom_prompt: Optional[str] = None,
|
||||
chunks_per_batch: int = 100,
|
||||
chunks_per_batch: int = None,
|
||||
**kwargs,
|
||||
) -> list[Task]:
|
||||
if config is None:
|
||||
|
|
@ -272,12 +272,14 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
||||
}
|
||||
|
||||
if chunks_per_batch is None:
|
||||
chunks_per_batch = 100
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
embed_triplets = cognify_config.triplet_embedding
|
||||
|
||||
if chunks_per_batch is None:
|
||||
chunks_per_batch = (
|
||||
cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
|
||||
)
|
||||
|
||||
default_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(
|
||||
|
|
@ -308,7 +310,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
|
||||
|
||||
async def get_temporal_tasks(
|
||||
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
|
||||
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
|
||||
) -> list[Task]:
|
||||
"""
|
||||
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
||||
|
|
@ -330,7 +332,10 @@ async def get_temporal_tasks(
|
|||
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
||||
"""
|
||||
if chunks_per_batch is None:
|
||||
chunks_per_batch = 10
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
configured = get_cognify_config().chunks_per_batch
|
||||
chunks_per_batch = configured if configured is not None else 10
|
||||
|
||||
temporal_tasks = [
|
||||
Task(classify_documents),
|
||||
|
|
|
|||
|
|
@ -46,6 +46,11 @@ class CognifyPayloadDTO(InDTO):
|
|||
examples=[[]],
|
||||
description="Reference to one or more previously uploaded ontologies",
|
||||
)
|
||||
chunks_per_batch: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Number of chunks to process per task batch in Cognify (overrides default).",
|
||||
examples=[10, 20, 50, 100],
|
||||
)
|
||||
|
||||
|
||||
def get_cognify_router() -> APIRouter:
|
||||
|
|
@ -146,6 +151,7 @@ def get_cognify_router() -> APIRouter:
|
|||
config=config_to_use,
|
||||
run_in_background=payload.run_in_background,
|
||||
custom_prompt=payload.custom_prompt,
|
||||
chunks_per_batch=payload.chunks_per_batch,
|
||||
)
|
||||
|
||||
# If any cognify run errored return JSONResponse with proper error status code
|
||||
|
|
|
|||
|
|
@ -6,14 +6,16 @@ from fastapi import Depends, APIRouter
|
|||
from fastapi.responses import JSONResponse
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
from cognee.modules.search.types import SearchType, SearchResult, CombinedSearchResult
|
||||
from cognee.modules.search.types import SearchType, SearchResult
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
|
||||
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError, UserNotFoundError
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.operations import get_history
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee import __version__ as cognee_version
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.exceptions import CogneeValidationError
|
||||
|
||||
|
||||
# Note: Datasets sent by name will only map to datasets owned by the request sender
|
||||
|
|
@ -29,7 +31,7 @@ class SearchPayloadDTO(InDTO):
|
|||
node_name: Optional[list[str]] = Field(default=None, example=[])
|
||||
top_k: Optional[int] = Field(default=10)
|
||||
only_context: bool = Field(default=False)
|
||||
use_combined_context: bool = Field(default=False)
|
||||
verbose: bool = Field(default=False)
|
||||
|
||||
|
||||
def get_search_router() -> APIRouter:
|
||||
|
|
@ -72,7 +74,7 @@ def get_search_router() -> APIRouter:
|
|||
except Exception as error:
|
||||
return JSONResponse(status_code=500, content={"error": str(error)})
|
||||
|
||||
@router.post("", response_model=Union[List[SearchResult], CombinedSearchResult, List])
|
||||
@router.post("", response_model=Union[List[SearchResult], List])
|
||||
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
"""
|
||||
Search for nodes in the graph database.
|
||||
|
|
@ -116,7 +118,7 @@ def get_search_router() -> APIRouter:
|
|||
"node_name": payload.node_name,
|
||||
"top_k": payload.top_k,
|
||||
"only_context": payload.only_context,
|
||||
"use_combined_context": payload.use_combined_context,
|
||||
"verbose": payload.verbose,
|
||||
"cognee_version": cognee_version,
|
||||
},
|
||||
)
|
||||
|
|
@ -133,11 +135,22 @@ def get_search_router() -> APIRouter:
|
|||
system_prompt=payload.system_prompt,
|
||||
node_name=payload.node_name,
|
||||
top_k=payload.top_k,
|
||||
verbose=payload.verbose,
|
||||
only_context=payload.only_context,
|
||||
use_combined_context=payload.use_combined_context,
|
||||
)
|
||||
|
||||
return jsonable_encoder(results)
|
||||
except (DatabaseNotCreatedError, UserNotFoundError, CogneeValidationError) as e:
|
||||
# Return a clear 422 with actionable guidance instead of leaking a stacktrace
|
||||
status_code = getattr(e, "status_code", 422)
|
||||
return JSONResponse(
|
||||
status_code=status_code,
|
||||
content={
|
||||
"error": "Search prerequisites not met",
|
||||
"detail": str(e),
|
||||
"hint": "Run `await cognee.add(...)` then `await cognee.cognify()` before searching.",
|
||||
},
|
||||
)
|
||||
except PermissionDeniedError:
|
||||
return []
|
||||
except Exception as error:
|
||||
|
|
|
|||
|
|
@ -4,13 +4,16 @@ from typing import Union, Optional, List, Type
|
|||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||
from cognee.modules.search.types import SearchResult, SearchType
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.search.methods import search as search_function
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||
from cognee.context_global_variables import set_session_user_context_variable
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.exceptions import CogneeValidationError
|
||||
from cognee.modules.users.exceptions.exceptions import UserNotFoundError
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
|
@ -29,11 +32,11 @@ async def search(
|
|||
save_interaction: bool = False,
|
||||
last_k: Optional[int] = 1,
|
||||
only_context: bool = False,
|
||||
use_combined_context: bool = False,
|
||||
session_id: Optional[str] = None,
|
||||
wide_search_top_k: Optional[int] = 100,
|
||||
triplet_distance_penalty: Optional[float] = 3.5,
|
||||
) -> Union[List[SearchResult], CombinedSearchResult]:
|
||||
verbose: bool = False,
|
||||
) -> List[SearchResult]:
|
||||
"""
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
||||
|
|
@ -123,6 +126,8 @@ async def search(
|
|||
|
||||
session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
|
||||
|
||||
verbose: If True, returns detailed result information including graph representation (when possible).
|
||||
|
||||
Returns:
|
||||
list: Search results in format determined by query_type:
|
||||
|
||||
|
|
@ -176,7 +181,18 @@ async def search(
|
|||
datasets = [datasets]
|
||||
|
||||
if user is None:
|
||||
user = await get_default_user()
|
||||
try:
|
||||
user = await get_default_user()
|
||||
except (DatabaseNotCreatedError, UserNotFoundError) as error:
|
||||
# Provide a clear, actionable message instead of surfacing low-level stacktraces
|
||||
raise CogneeValidationError(
|
||||
message=(
|
||||
"Search prerequisites not met: no database/default user found. "
|
||||
"Initialize Cognee before searching by:\n"
|
||||
"• running `await cognee.add(...)` followed by `await cognee.cognify()`."
|
||||
),
|
||||
name="SearchPreconditionError",
|
||||
) from error
|
||||
|
||||
await set_session_user_context_variable(user)
|
||||
|
||||
|
|
@ -200,10 +216,10 @@ async def search(
|
|||
save_interaction=save_interaction,
|
||||
last_k=last_k,
|
||||
only_context=only_context,
|
||||
use_combined_context=use_combined_context,
|
||||
session_id=session_id,
|
||||
wide_search_top_k=wide_search_top_k,
|
||||
triplet_distance_penalty=triplet_distance_penalty,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
return filtered_search_results
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ def get_sync_router() -> APIRouter:
|
|||
-H "Content-Type: application/json" \\
|
||||
-H "Cookie: auth_token=your-token" \\
|
||||
-d '{"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]}'
|
||||
|
||||
|
||||
# Sync all user datasets (empty request body or null dataset_ids)
|
||||
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
|
|
@ -88,7 +88,7 @@ def get_sync_router() -> APIRouter:
|
|||
- **413 Payload Too Large**: Dataset too large for current cloud plan
|
||||
- **429 Too Many Requests**: Rate limit exceeded
|
||||
|
||||
## Notes
|
||||
## Notes
|
||||
- Sync operations run in the background - you get an immediate response
|
||||
- Use the returned run_id to track progress (status API coming soon)
|
||||
- Large datasets are automatically chunked for efficient transfer
|
||||
|
|
@ -179,7 +179,7 @@ def get_sync_router() -> APIRouter:
|
|||
```
|
||||
|
||||
## Example Responses
|
||||
|
||||
|
||||
**No running syncs:**
|
||||
```json
|
||||
{
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ binary streams, then stores them in a specified dataset for further processing.
|
|||
|
||||
Supported Input Types:
|
||||
- **Text strings**: Direct text content
|
||||
- **File paths**: Local file paths (absolute paths starting with "/")
|
||||
- **File paths**: Local file paths (absolute paths starting with "/")
|
||||
- **File URLs**: "file:///absolute/path" or "file://relative/path"
|
||||
- **S3 paths**: "s3://bucket-name/path/to/file"
|
||||
- **Lists**: Multiple files or text strings in a single call
|
||||
|
|
|
|||
|
|
@ -62,6 +62,11 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Show detailed progress information"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chunks-per-batch",
|
||||
type=int,
|
||||
help="Number of chunks to process per task batch (try 50 for large single documents).",
|
||||
)
|
||||
|
||||
def execute(self, args: argparse.Namespace) -> None:
|
||||
try:
|
||||
|
|
@ -111,6 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|||
chunk_size=args.chunk_size,
|
||||
ontology_file_path=args.ontology_file,
|
||||
run_in_background=args.background,
|
||||
chunks_per_batch=getattr(args, "chunks_per_batch", None),
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ The `cognee config` command allows you to view and modify configuration settings
|
|||
|
||||
You can:
|
||||
- View all current configuration settings
|
||||
- Get specific configuration values
|
||||
- Get specific configuration values
|
||||
- Set configuration values
|
||||
- Unset (reset to default) specific configuration values
|
||||
- Reset all configuration to defaults
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ async def get_graph_engine() -> GraphDBInterface:
|
|||
return graph_client
|
||||
|
||||
|
||||
@lru_cache
|
||||
def create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
|
|
@ -35,6 +34,35 @@ def create_graph_engine(
|
|||
graph_database_port="",
|
||||
graph_database_key="",
|
||||
graph_dataset_database_handler="",
|
||||
):
|
||||
"""
|
||||
Wrapper function to call create graph engine with caching.
|
||||
For a detailed description, see _create_graph_engine.
|
||||
"""
|
||||
return _create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
graph_database_url,
|
||||
graph_database_name,
|
||||
graph_database_username,
|
||||
graph_database_password,
|
||||
graph_database_port,
|
||||
graph_database_key,
|
||||
graph_dataset_database_handler,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _create_graph_engine(
|
||||
graph_database_provider,
|
||||
graph_file_path,
|
||||
graph_database_url="",
|
||||
graph_database_name="",
|
||||
graph_database_username="",
|
||||
graph_database_password="",
|
||||
graph_database_port="",
|
||||
graph_database_key="",
|
||||
graph_dataset_database_handler="",
|
||||
):
|
||||
"""
|
||||
Create a graph engine based on the specified provider type.
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
import os
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import requests
|
||||
import base64
|
||||
import hashlib
|
||||
from uuid import UUID
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
from cryptography.fernet import Fernet
|
||||
from aiohttp import BasicAuth
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_config
|
||||
from cognee.modules.users.models import User, DatasetDatabase
|
||||
|
|
@ -23,7 +25,6 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
|
||||
Quality of life improvements:
|
||||
- Allow configuration of different Neo4j Aura plans and regions.
|
||||
- Requests should be made async, currently a blocking requests library is used.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
|
@ -49,6 +50,7 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
graph_db_name = f"{dataset_id}"
|
||||
|
||||
# Client credentials and encryption
|
||||
# Note: Should not be used as class variables so that they are not persisted in memory longer than needed
|
||||
client_id = os.environ.get("NEO4J_CLIENT_ID", None)
|
||||
client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
|
||||
tenant_id = os.environ.get("NEO4J_TENANT_ID", None)
|
||||
|
|
@ -63,22 +65,13 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
"NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling."
|
||||
)
|
||||
|
||||
# Make the request with HTTP Basic Auth
|
||||
def get_aura_token(client_id: str, client_secret: str) -> dict:
|
||||
url = "https://api.neo4j.io/oauth/token"
|
||||
data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
|
||||
|
||||
resp = requests.post(url, data=data, auth=(client_id, client_secret))
|
||||
resp.raise_for_status() # raises if the request failed
|
||||
return resp.json()
|
||||
|
||||
resp = get_aura_token(client_id, client_secret)
|
||||
resp_token = await cls._get_aura_token(client_id, client_secret)
|
||||
|
||||
url = "https://api.neo4j.io/v1/instances"
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"Authorization": f"Bearer {resp['access_token']}",
|
||||
"Authorization": f"Bearer {resp_token['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
|
@ -96,31 +89,38 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
"cloud_provider": "gcp",
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
async def _create_database_instance_request():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=payload) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
resp_create = await _create_database_instance_request()
|
||||
|
||||
graph_db_name = "neo4j" # Has to be 'neo4j' for Aura
|
||||
graph_db_url = response.json()["data"]["connection_url"]
|
||||
graph_db_key = resp["access_token"]
|
||||
graph_db_username = response.json()["data"]["username"]
|
||||
graph_db_password = response.json()["data"]["password"]
|
||||
graph_db_url = resp_create["data"]["connection_url"]
|
||||
graph_db_key = resp_token["access_token"]
|
||||
graph_db_username = resp_create["data"]["username"]
|
||||
graph_db_password = resp_create["data"]["password"]
|
||||
|
||||
async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict):
|
||||
# Poll until the instance is running
|
||||
status_url = f"https://api.neo4j.io/v1/instances/{instance_id}"
|
||||
status = ""
|
||||
for attempt in range(30): # Try for up to ~5 minutes
|
||||
status_resp = requests.get(
|
||||
status_url, headers=headers
|
||||
) # TODO: Use async requests with httpx
|
||||
status = status_resp.json()["data"]["status"]
|
||||
if status.lower() == "running":
|
||||
return
|
||||
await asyncio.sleep(10)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(status_url, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
status_resp = await resp.json()
|
||||
status = status_resp["data"]["status"]
|
||||
if status.lower() == "running":
|
||||
return
|
||||
await asyncio.sleep(10)
|
||||
raise TimeoutError(
|
||||
f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}"
|
||||
)
|
||||
|
||||
instance_id = response.json()["data"]["id"]
|
||||
instance_id = resp_create["data"]["id"]
|
||||
await _wait_for_neo4j_instance_provisioning(instance_id, headers)
|
||||
|
||||
encrypted_db_password_bytes = cipher.encrypt(graph_db_password.encode())
|
||||
|
|
@ -165,4 +165,39 @@ class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|||
|
||||
@classmethod
|
||||
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
||||
pass
|
||||
# Get dataset database information and credentials
|
||||
dataset_database = await cls.resolve_dataset_connection_info(dataset_database)
|
||||
|
||||
parsed_url = urlparse(dataset_database.graph_database_url)
|
||||
instance_id = parsed_url.hostname.split(".")[0]
|
||||
|
||||
url = f"https://api.neo4j.io/v1/instances/{instance_id}"
|
||||
|
||||
# Get access token for Neo4j Aura API
|
||||
# Client credentials
|
||||
client_id = os.environ.get("NEO4J_CLIENT_ID", None)
|
||||
client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
|
||||
resp = await cls._get_aura_token(client_id, client_secret)
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"Authorization": f"Bearer {resp['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.delete(url, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
@classmethod
|
||||
async def _get_aura_token(cls, client_id: str, client_secret: str) -> dict:
|
||||
url = "https://api.neo4j.io/oauth/token"
|
||||
data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url, data=data, auth=BasicAuth(client_id, client_secret)
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
query_string = f"""
|
||||
CALL neptune.algo.vectors.topKByEmbeddingWithFiltering({{
|
||||
topK: {limit},
|
||||
embedding: {embedding},
|
||||
embedding: {embedding},
|
||||
nodeFilter: {{ equals: {{property: '{self._COLLECTION_PREFIX}', value: '{collection_name}'}} }}
|
||||
}}
|
||||
)
|
||||
|
|
@ -299,7 +299,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
|
||||
if with_vector:
|
||||
query_string += """
|
||||
WITH node, score, id(node) as node_id
|
||||
WITH node, score, id(node) as node_id
|
||||
MATCH (n)
|
||||
WHERE id(n) = id(node)
|
||||
CALL neptune.algo.vectors.get(n)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from cognee.infrastructure.databases.graph.config import get_graph_context_confi
|
|||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache
|
||||
def create_vector_engine(
|
||||
vector_db_provider: str,
|
||||
vector_db_url: str,
|
||||
|
|
@ -15,6 +14,29 @@ def create_vector_engine(
|
|||
vector_db_port: str = "",
|
||||
vector_db_key: str = "",
|
||||
vector_dataset_database_handler: str = "",
|
||||
):
|
||||
"""
|
||||
Wrapper function to call create vector engine with caching.
|
||||
For a detailed description, see _create_vector_engine.
|
||||
"""
|
||||
return _create_vector_engine(
|
||||
vector_db_provider,
|
||||
vector_db_url,
|
||||
vector_db_name,
|
||||
vector_db_port,
|
||||
vector_db_key,
|
||||
vector_dataset_database_handler,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _create_vector_engine(
|
||||
vector_db_provider: str,
|
||||
vector_db_url: str,
|
||||
vector_db_name: str,
|
||||
vector_db_port: str = "",
|
||||
vector_db_key: str = "",
|
||||
vector_dataset_database_handler: str = "",
|
||||
):
|
||||
"""
|
||||
Create a vector database engine based on the specified provider.
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ from tenacity import (
|
|||
)
|
||||
import litellm
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
||||
from cognee.infrastructure.llm.tokenizer.HuggingFace import (
|
||||
|
|
@ -79,10 +81,26 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
enable_mocking = str(enable_mocking).lower()
|
||||
self.mock = enable_mocking in ("true", "1", "yes")
|
||||
|
||||
# Validate provided custom embedding endpoint early to avoid long hangs later
|
||||
if self.endpoint:
|
||||
try:
|
||||
parsed = urlparse(self.endpoint)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if not parsed or parsed.scheme not in ("http", "https") or not parsed.netloc:
|
||||
logger.error(
|
||||
"Invalid EMBEDDING_ENDPOINT configured: '%s'. Expected a URL starting with http:// or https://",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Invalid EMBEDDING_ENDPOINT. Please set a valid URL (e.g., https://host:port) "
|
||||
"via environment variable EMBEDDING_ENDPOINT."
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
stop=stop_after_delay(30),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
retry=retry_if_not_exception_type((litellm.exceptions.NotFoundError, EmbeddingException)),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
|
|
@ -111,12 +129,16 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
return [data["embedding"] for data in response["data"]]
|
||||
else:
|
||||
async with embedding_rate_limiter_context_manager():
|
||||
response = await litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
# Ensure each attempt does not hang indefinitely
|
||||
response = await asyncio.wait_for(
|
||||
litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
),
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
return [data["embedding"] for data in response.data]
|
||||
|
|
@ -154,6 +176,27 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
logger.error("Context window exceeded for embedding text: %s", str(error))
|
||||
raise error
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
# Per-attempt timeout – likely an unreachable endpoint
|
||||
logger.error(
|
||||
"Embedding endpoint timed out. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Verify that the endpoint is reachable and correct.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding request timed out. Check EMBEDDING_ENDPOINT connectivity."
|
||||
) from e
|
||||
|
||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
||||
logger.error(
|
||||
"Failed to connect to embedding endpoint. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Ensure the URL is correct and the server is running.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Cannot connect to embedding endpoint. Check EMBEDDING_ENDPOINT."
|
||||
) from e
|
||||
|
||||
except (
|
||||
litellm.exceptions.BadRequestError,
|
||||
litellm.exceptions.NotFoundError,
|
||||
|
|
@ -162,8 +205,15 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
raise EmbeddingException(f"Failed to index data points using model {self.model}") from e
|
||||
|
||||
except Exception as error:
|
||||
logger.error("Error embedding text: %s", str(error))
|
||||
raise error
|
||||
# Fall back to a clear, actionable message for connectivity/misconfiguration issues
|
||||
logger.error(
|
||||
"Error embedding text: %s. EMBEDDING_ENDPOINT='%s'.",
|
||||
str(error),
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding failed due to an unexpected error. Verify EMBEDDING_ENDPOINT and provider settings."
|
||||
) from error
|
||||
|
||||
def get_vector_size(self) -> int:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -10,4 +10,4 @@ Extraction rules:
|
|||
5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
|
||||
6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
|
||||
7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
|
||||
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
||||
8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
|
||||
|
|
|
|||
|
|
@ -22,4 +22,4 @@ The `attributes` should be a list of dictionaries, each containing:
|
|||
- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
|
||||
- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
|
||||
- You can combine two words to form a relationship name: subject_role, previous_owner, etc.
|
||||
- Focus on how the entity specifically relates to the event
|
||||
- Focus on how the entity specifically relates to the event
|
||||
|
|
|
|||
|
|
@ -27,4 +27,4 @@ class Event(BaseModel):
|
|||
time_from: Optional[Timestamp] = None
|
||||
time_to: Optional[Timestamp] = None
|
||||
location: Optional[str] = None
|
||||
```
|
||||
```
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ The aim is to achieve simplicity and clarity in the knowledge graph.
|
|||
- **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.
|
||||
# 3. Coreference Resolution
|
||||
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
|
||||
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the Persons ID.
|
||||
If an entity, is mentioned multiple times in the text but is referred to by different names or pronouns,
|
||||
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
|
||||
# 4. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ You are an advanced algorithm designed to extract structured information to buil
|
|||
3. **Coreference Resolution**:
|
||||
- Maintain one consistent node ID for each real-world entity.
|
||||
- Resolve aliases, acronyms, and pronouns to the most complete form.
|
||||
- *Example*: Always use "John Doe" even if later referred to as "Doe" or "he".
|
||||
- *Example*: Always use full identifier even if later referred to as in a similar but slightly different way
|
||||
|
||||
**Property & Data Guidelines**:
|
||||
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@ You are an advanced algorithm designed to extract structured information from un
|
|||
- **Rule**: Resolve all aliases, acronyms, and pronouns to one canonical identifier.
|
||||
|
||||
> **One-Shot Example**:
|
||||
> **Input**: "John Doe is an author. Later, Doe published a book. He is well-known."
|
||||
> **Input**: "X is an author. Later, Doe published a book. He is well-known."
|
||||
> **Output Node**:
|
||||
> ```
|
||||
> John Doe (Person)
|
||||
> X (Person)
|
||||
> ```
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ You are an advanced algorithm that extracts structured data into a knowledge gra
|
|||
- Properties are key-value pairs; do not use escaped quotes.
|
||||
|
||||
3. **Coreference Resolution**
|
||||
- Use a single, complete identifier for each entity (e.g., always "John Doe" not "Joe" or "he").
|
||||
- Use a single, complete identifier for each entity
|
||||
|
||||
4. **Relationship Labels**:
|
||||
- Use descriptive, lowercase, snake_case names for edges.
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ Use **basic atomic types** for node labels. Always prefer general types over spe
|
|||
- Good: "Alan Turing", "Google Inc.", "World War II"
|
||||
- Bad: "Entity_001", "1234", "he", "they"
|
||||
- Never use numeric or autogenerated IDs.
|
||||
- Prioritize **most complete form** of entity names for consistency (e.g., always use "John Doe" instead of "John" or "he").
|
||||
- Prioritize **most complete form** of entity names for consistency
|
||||
|
||||
2. Dates, Numbers, and Properties
|
||||
---------------------------------
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@ You are an expert query analyzer for a **GraphRAG system**. Your primary goal is
|
|||
|
||||
Here are the available `SearchType` tools and their specific functions:
|
||||
|
||||
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
||||
- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph.
|
||||
|
||||
**Best for:**
|
||||
**Best for:**
|
||||
|
||||
- Getting concise overviews of topics
|
||||
- Summarizing large amounts of information
|
||||
- Getting concise overviews of topics
|
||||
- Summarizing large amounts of information
|
||||
- Quick understanding of complex subjects
|
||||
|
||||
**Best for:**
|
||||
|
|
@ -16,7 +16,7 @@ Here are the available `SearchType` tools and their specific functions:
|
|||
- Understanding relationships between concepts
|
||||
- Exploring the structure of your knowledge graph
|
||||
|
||||
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
||||
* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph.
|
||||
|
||||
**Best for:**
|
||||
|
||||
|
|
@ -122,4 +122,4 @@ Response: `NATURAL_LANGUAGE`
|
|||
|
||||
|
||||
|
||||
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
||||
Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Respond with: test
|
||||
Respond with: test
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ class LLMProvider(Enum):
|
|||
GEMINI = "gemini"
|
||||
MISTRAL = "mistral"
|
||||
BEDROCK = "bedrock"
|
||||
LLAMA_CPP = "llama_cpp"
|
||||
|
||||
|
||||
def get_llm_client(raise_api_key_error: bool = True):
|
||||
|
|
@ -187,5 +188,28 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
)
|
||||
|
||||
elif provider == LLMProvider.LLAMA_CPP:
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llama_cpp.adapter import (
|
||||
LlamaCppAPIAdapter,
|
||||
)
|
||||
|
||||
# Get optional local mode parameters (will be None if not set)
|
||||
# TODO: refactor llm_config to include these parameters, currently they cannot be defined and defaults are used
|
||||
model_path = getattr(llm_config, "llama_cpp_model_path", None)
|
||||
n_ctx = getattr(llm_config, "llama_cpp_n_ctx", 2048)
|
||||
n_gpu_layers = getattr(llm_config, "llama_cpp_n_gpu_layers", 0)
|
||||
chat_format = getattr(llm_config, "llama_cpp_chat_format", "chatml")
|
||||
|
||||
return LlamaCppAPIAdapter(
|
||||
model=llm_config.llm_model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
endpoint=llm_config.llm_endpoint,
|
||||
api_key=llm_config.llm_api_key,
|
||||
model_path=model_path,
|
||||
n_ctx=n_ctx,
|
||||
n_gpu_layers=n_gpu_layers,
|
||||
chat_format=chat_format,
|
||||
)
|
||||
else:
|
||||
raise UnsupportedLLMProviderError(provider)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,191 @@
|
|||
"""Adapter for Instructor-backed Structured Output Framework for Llama CPP"""
|
||||
|
||||
import litellm
|
||||
import logging
|
||||
import instructor
|
||||
from typing import Type, Optional
|
||||
from openai import AsyncOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
)
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_delay,
|
||||
wait_exponential_jitter,
|
||||
retry_if_not_exception_type,
|
||||
before_sleep_log,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class LlamaCppAPIAdapter(LLMInterface):
|
||||
"""
|
||||
Adapter for Llama CPP LLM provider with support for TWO modes:
|
||||
|
||||
1. SERVER MODE (OpenAI-compatible):
|
||||
- Connects to llama-cpp-python server via HTTP (local or remote)
|
||||
- Uses instructor.from_openai()
|
||||
- Requires: endpoint, api_key, model
|
||||
|
||||
2. LOCAL MODE (In-process):
|
||||
- Loads model directly using llama-cpp-python library
|
||||
- Uses instructor.patch() on llama.Llama object
|
||||
- Requires: model_path
|
||||
|
||||
Public methods:
|
||||
- acreate_structured_output
|
||||
|
||||
Instance variables:
|
||||
- name
|
||||
- model (for server mode) or model_path (for local mode)
|
||||
- mode_type: "server" or "local"
|
||||
- max_completion_tokens
|
||||
- aclient
|
||||
"""
|
||||
|
||||
name: str
|
||||
model: Optional[str]
|
||||
model_path: Optional[str]
|
||||
mode_type: str # "server" or "local"
|
||||
default_instructor_mode = instructor.Mode.JSON
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "LlamaCpp",
|
||||
max_completion_tokens: int = 2048,
|
||||
instructor_mode: Optional[str] = None,
|
||||
# Server mode parameters
|
||||
endpoint: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
# Local mode parameters
|
||||
model_path: Optional[str] = None,
|
||||
n_ctx: int = 2048,
|
||||
n_gpu_layers: int = 0,
|
||||
chat_format: str = "chatml",
|
||||
):
|
||||
self.name = name
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
|
||||
# Determine which mode to use
|
||||
if model_path:
|
||||
self._init_local_mode(model_path, n_ctx, n_gpu_layers, chat_format)
|
||||
elif endpoint:
|
||||
self._init_server_mode(endpoint, api_key, model)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Must provide either 'model_path' (for local mode) or 'endpoint' (for server mode)"
|
||||
)
|
||||
|
||||
def _init_local_mode(self, model_path: str, n_ctx: int, n_gpu_layers: int, chat_format: str):
|
||||
"""Initialize local mode using llama-cpp-python library directly"""
|
||||
try:
|
||||
import llama_cpp
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"llama-cpp-python is not installed. Install with: pip install llama-cpp-python"
|
||||
)
|
||||
|
||||
logger.info(f"Initializing LlamaCpp in LOCAL mode with model: {model_path}")
|
||||
|
||||
self.mode_type = "local"
|
||||
self.model_path = model_path
|
||||
self.model = None
|
||||
|
||||
# Initialize llama-cpp-python with the model
|
||||
self.llama = llama_cpp.Llama(
|
||||
model_path=model_path,
|
||||
n_gpu_layers=n_gpu_layers, # -1 for all GPU, 0 for CPU only
|
||||
chat_format=chat_format,
|
||||
n_ctx=n_ctx,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
self.aclient = instructor.patch(
|
||||
create=self.llama.create_chat_completion_openai_v1,
|
||||
mode=instructor.Mode(self.instructor_mode),
|
||||
)
|
||||
|
||||
def _init_server_mode(self, endpoint: str, api_key: Optional[str], model: Optional[str]):
|
||||
"""Initialize server mode connecting to llama-cpp-python server"""
|
||||
logger.info(f"Initializing LlamaCpp in SERVER mode with endpoint: {endpoint}")
|
||||
|
||||
self.mode_type = "server"
|
||||
self.model = model
|
||||
self.model_path = None
|
||||
self.endpoint = endpoint
|
||||
self.api_key = api_key
|
||||
|
||||
# Use instructor.from_openai() for server mode (OpenAI-compatible API)
|
||||
self.aclient = instructor.from_openai(
|
||||
AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key),
|
||||
mode=instructor.Mode(self.instructor_mode),
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def acreate_structured_output(
|
||||
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
|
||||
) -> BaseModel:
|
||||
"""
|
||||
Generate a structured output from the LLM using the provided text and system prompt.
|
||||
|
||||
Works in both local and server modes transparently.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- text_input (str): The input text provided by the user.
|
||||
- system_prompt (str): The system prompt that guides the response generation.
|
||||
- response_model (Type[BaseModel]): The model type that the response should conform to.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
- BaseModel: A structured output that conforms to the specified response model.
|
||||
"""
|
||||
async with llm_rate_limiter_context_manager():
|
||||
# Prepare messages (system first, then user is more standard)
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": text_input},
|
||||
]
|
||||
|
||||
if self.mode_type == "server":
|
||||
# Server mode: use async client with OpenAI-compatible API
|
||||
response = await self.aclient.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
response_model=response_model,
|
||||
max_retries=2,
|
||||
max_completion_tokens=self.max_completion_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
import asyncio
|
||||
|
||||
# Local mode: instructor.patch() returns a SYNC callable
|
||||
# Per docs: https://python.useinstructor.com/integrations/llama-cpp-python/
|
||||
def _call_sync():
|
||||
return self.aclient(
|
||||
messages=messages,
|
||||
response_model=response_model,
|
||||
max_tokens=self.max_completion_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Run sync function in thread pool to avoid blocking
|
||||
response = await asyncio.to_thread(_call_sync)
|
||||
|
||||
return response
|
||||
|
|
@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings):
|
|||
classification_model: object = DefaultContentPrediction
|
||||
summarization_model: object = SummarizedContent
|
||||
triplet_embedding: bool = False
|
||||
chunks_per_batch: Optional[int] = None
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
|
|
@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings):
|
|||
"classification_model": self.classification_model,
|
||||
"summarization_model": self.summarization_model,
|
||||
"triplet_embedding": self.triplet_embedding,
|
||||
"chunks_per_batch": self.chunks_per_batch,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,3 +15,9 @@ async def setup():
|
|||
"""
|
||||
await create_relational_db_and_tables()
|
||||
await create_pgvector_db_and_tables()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(setup())
|
||||
|
|
|
|||
|
|
@ -215,9 +215,6 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
edge_penalty=triplet_distance_penalty,
|
||||
)
|
||||
self.add_edge(edge)
|
||||
|
||||
source_node.add_skeleton_edge(edge)
|
||||
target_node.add_skeleton_edge(edge)
|
||||
else:
|
||||
raise EntityNotFoundError(
|
||||
message=f"Edge references nonexistent nodes: {source_id} -> {target_id}"
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue