Compare commits
173 commits
main
...
feat/delet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06dda4e4b4 | ||
|
|
a0f25f4f50 | ||
|
|
8965e31a58 | ||
|
|
e5341c5f49 | ||
|
|
dd639fa967 | ||
|
|
d578971b60 | ||
|
|
27f2aa03b3 | ||
|
|
310e9e97ae | ||
|
|
5f8a3e24bd | ||
|
|
f6c76ce19e | ||
|
|
c3cec818d7 | ||
|
|
9808077b4c | ||
|
|
9b2b1a9c13 | ||
|
|
16cf955497 | ||
|
|
2c4f9b07ac | ||
|
|
a85df53c74 | ||
|
|
3bc3f63362 | ||
|
|
72dae0f79a | ||
|
|
7cf93ea79d | ||
|
|
8214bdce5b | ||
|
|
4b71995a70 | ||
|
|
8a490b1c16 | ||
|
|
9819b38058 | ||
|
|
eb444ca18f | ||
|
|
3055ed89c8 | ||
|
|
ee967ae3fa | ||
|
|
976ac78e5e | ||
|
|
ef7ebc0748 | ||
|
|
3311db55bf | ||
|
|
672a776df5 | ||
|
|
edb541505c | ||
|
|
3e47de5ea0 | ||
|
|
9c04f46572 | ||
|
|
ef51dcfb7a | ||
|
|
4f07adee66 | ||
|
|
a70ce2785b | ||
|
|
d127381262 | ||
|
|
f93d414e94 | ||
|
|
c1ea7a8cc2 | ||
|
|
8602ba1e93 | ||
|
|
4d03fcfa9e | ||
|
|
2ef8094666 | ||
|
|
d352ff0c28 | ||
|
|
6e5e79f434 | ||
|
|
46ff01021a | ||
|
|
69ab8e7ede | ||
|
|
cc7ca45e73 | ||
|
|
929d88557e | ||
|
|
431a83247f | ||
|
|
6958b4edd4 | ||
|
|
a5a7ae2564 | ||
|
|
601f74db4f | ||
|
|
e92d8f57b5 | ||
|
|
d8b4411aac | ||
|
|
f79ba53e1d | ||
|
|
999e6c0981 | ||
|
|
b0454b49a9 | ||
|
|
94d5175570 | ||
|
|
623126eec1 | ||
|
|
cc872fc8de | ||
|
|
233afdd0a9 | ||
|
|
b77961b0f1 | ||
|
|
8f8f4c0b63 | ||
|
|
56b03c89f3 | ||
|
|
aeda1d8eba | ||
|
|
1500b1c693 | ||
|
|
18d0a41850 | ||
|
|
4ff2a35476 | ||
|
|
8027263e8b | ||
|
|
f27d07d902 | ||
|
|
789fa90790 | ||
|
|
3e041ec12f | ||
|
|
f2cb68dd5e | ||
|
|
d92d6b9d8f | ||
|
|
7892b48afe | ||
|
|
a52873a71f | ||
|
|
48c2040f3d | ||
|
|
757d5fca65 | ||
|
|
13c034e2e4 | ||
|
|
89ef7d7d15 | ||
|
|
c61ff60e40 | ||
|
|
0d5b284147 | ||
|
|
12e6ad152e | ||
|
|
aad4d0cdde | ||
|
|
bad9f09f4c | ||
|
|
42e711fe3a | ||
|
|
412b6467da | ||
|
|
9a509e4e2e | ||
|
|
7f1a51fcdc | ||
|
|
646894d7c5 | ||
|
|
21407dd9ed | ||
|
|
b4aaa7faef | ||
|
|
4e8845c117 | ||
|
|
fa035f42f4 | ||
|
|
7cf6f08283 | ||
|
|
fd23c75c09 | ||
|
|
0cde551226 | ||
|
|
0f4cf15d58 | ||
|
|
41edeb0cf8 | ||
|
|
cd60ae3174 | ||
|
|
0d96606fb2 | ||
|
|
2485c3f5f0 | ||
|
|
f48df27fc8 | ||
|
|
6260f9eb82 | ||
|
|
e654bcb081 | ||
|
|
829a6f0d04 | ||
|
|
2067c459e3 | ||
|
|
4d0f132822 | ||
|
|
ab20443330 | ||
|
|
d57d188459 | ||
|
|
8e5f14da78 | ||
|
|
7972e39653 | ||
|
|
6ecf719632 | ||
|
|
2de1bd977d | ||
|
|
032a74a409 | ||
|
|
28faf7ce04 | ||
|
|
e1d313a46b | ||
|
|
654a573454 | ||
|
|
ec744f01cc | ||
|
|
b3f9795d2e | ||
|
|
f26b490a8f | ||
|
|
1f98d50870 | ||
|
|
a7da9c7d65 | ||
|
|
4f3a1bcf01 | ||
|
|
5f00abf3e4 | ||
|
|
3f53534c99 | ||
|
|
c892265644 | ||
|
|
f9b16e508d | ||
|
|
6a4d31356b | ||
|
|
3cda1af29d | ||
|
|
73d84129de | ||
|
|
57195fb5a1 | ||
|
|
12ce80005c | ||
|
|
09fbf22768 | ||
|
|
02b1778658 | ||
|
|
5cb6510205 | ||
|
|
d6da7a999b | ||
|
|
b52c1a1e25 | ||
|
|
53d3b50f93 | ||
|
|
43290af1b2 | ||
|
|
a072773995 | ||
|
|
5fac3b40b9 | ||
|
|
7bd7079aac | ||
|
|
a451fb8c5a | ||
|
|
8ea83e4a26 | ||
|
|
82d48663bb | ||
|
|
1e56d6dc38 | ||
|
|
503bdc34f3 | ||
|
|
b328aeff12 | ||
|
|
ac3300760b | ||
|
|
d351c9a009 | ||
|
|
84bd2f38f7 | ||
|
|
84c8e07ddd | ||
|
|
85a2bac062 | ||
|
|
ce4a5c8311 | ||
|
|
b327756e5f | ||
|
|
bd71540d75 | ||
|
|
fdf037b3d0 | ||
|
|
c5f0c4af87 | ||
|
|
ff263c0132 | ||
|
|
9041a804ec | ||
|
|
3c0e915812 | ||
|
|
d34fd9237b | ||
|
|
90d10e6f9a | ||
|
|
5080e8f8a5 | ||
|
|
f1afd1f0a2 | ||
|
|
4b43afcdab | ||
|
|
6f06e4a5eb | ||
|
|
13396871c9 | ||
|
|
5f6f0502c8 | ||
|
|
7d4804ff7b | ||
|
|
3f27c5592b | ||
|
|
3372679f7b |
200 changed files with 27122 additions and 8883 deletions
|
|
@ -91,6 +91,15 @@ DB_NAME=cognee_db
|
|||
#DB_USERNAME=cognee
|
||||
#DB_PASSWORD=cognee
|
||||
|
||||
# -- Advanced: Custom database connection arguments (optional) ---------------
|
||||
# Pass additional connection parameters as JSON. Useful for SSL, timeouts, etc.
|
||||
# Examples:
|
||||
# For PostgreSQL with SSL:
|
||||
# DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
|
||||
# For SQLite with custom timeout:
|
||||
# DATABASE_CONNECT_ARGS='{"timeout": 60}'
|
||||
#DATABASE_CONNECT_ARGS='{}'
|
||||
|
||||
################################################################################
|
||||
# 🕸️ Graph Database settings
|
||||
################################################################################
|
||||
|
|
|
|||
27
.github/workflows/e2e_tests.yml
vendored
27
.github/workflows/e2e_tests.yml
vendored
|
|
@ -288,7 +288,7 @@ jobs:
|
|||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_permissions.py
|
||||
run: uv run pytest cognee/tests/test_permissions.py -v --log-level=INFO
|
||||
|
||||
test-multi-tenancy:
|
||||
name: Test multi tenancy with different situations in Cognee
|
||||
|
|
@ -315,6 +315,31 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_multi_tenancy.py
|
||||
|
||||
test-data-label:
|
||||
name: Test adding of label for data in Cognee
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run custom data label test
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_custom_data_label.py
|
||||
|
||||
test-graph-edges:
|
||||
name: Test graph edge ingestion
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
61
.github/workflows/examples_tests.yml
vendored
61
.github/workflows/examples_tests.yml
vendored
|
|
@ -257,7 +257,7 @@ jobs:
|
|||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Memify Tests
|
||||
- name: Run Permissions Example
|
||||
env:
|
||||
ENV: 'dev'
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
|
@ -270,6 +270,65 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/permissions_example.py
|
||||
|
||||
test-s3-permissions-example: # Make sure permission and multi-user mode work with S3 file system
|
||||
name: Run Permissions Example
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
services:
|
||||
postgres: # Using postgres to avoid storing and using SQLite from S3
|
||||
image: pgvector/pgvector:pg17
|
||||
env:
|
||||
POSTGRES_USER: cognee
|
||||
POSTGRES_PASSWORD: cognee
|
||||
POSTGRES_DB: cognee_db
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
ports:
|
||||
- 5432:5432
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
extra-dependencies: "postgres aws"
|
||||
|
||||
- name: Run S3 Permissions Example
|
||||
env:
|
||||
ENV: 'dev'
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
STORAGE_BACKEND: 's3'
|
||||
AWS_REGION: eu-west-1
|
||||
AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }}
|
||||
STORAGE_BUCKET_NAME: github-runner-cognee-tests
|
||||
DATA_ROOT_DIRECTORY: "s3://github-runner-cognee-tests/cognee/data"
|
||||
SYSTEM_ROOT_DIRECTORY: "s3://github-runner-cognee-tests/cognee/system"
|
||||
DB_PROVIDER: 'postgres'
|
||||
DB_NAME: 'cognee_db'
|
||||
DB_HOST: '127.0.0.1'
|
||||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run python ./examples/python/permissions_example.py
|
||||
|
||||
test_docling_add:
|
||||
name: Run Add with Docling Test
|
||||
runs-on: macos-15
|
||||
|
|
|
|||
28
.github/workflows/release.yml
vendored
28
.github/workflows/release.yml
vendored
|
|
@ -136,3 +136,31 @@ jobs:
|
|||
flavour=${{ inputs.flavour }}
|
||||
cache-from: type=registry,ref=cognee/cognee:buildcache
|
||||
cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max
|
||||
|
||||
trigger-docs-test-suite:
|
||||
needs: release-pypi-package
|
||||
if: ${{ inputs.flavour == 'main' }}
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Trigger docs tests
|
||||
run: |
|
||||
curl -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/topoteretes/cognee-docs/dispatches \
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
|
||||
trigger-community-test-suite:
|
||||
needs: release-pypi-package
|
||||
if: ${{ inputs.flavour == 'main' }}
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Trigger community tests
|
||||
run: |
|
||||
curl -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.REPO_DISPATCH_PAT_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/topoteretes/cognee-community/dispatches \
|
||||
-d '{"event_type":"new-main-release","client_payload":{"caller_repo":"'"${GITHUB_REPOSITORY}"'"}}'
|
||||
3
.github/workflows/release_test.yml
vendored
3
.github/workflows/release_test.yml
vendored
|
|
@ -14,4 +14,5 @@ jobs:
|
|||
load-tests:
|
||||
name: Load Tests
|
||||
uses: ./.github/workflows/load_tests.yml
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
||||
|
|
|
|||
46
.github/workflows/search_db_tests.yml
vendored
46
.github/workflows/search_db_tests.yml
vendored
|
|
@ -11,12 +11,21 @@ on:
|
|||
type: string
|
||||
default: "all"
|
||||
description: "Which vector databases to test (comma-separated list or 'all')"
|
||||
python-versions:
|
||||
required: false
|
||||
type: string
|
||||
default: '["3.10", "3.11", "3.12", "3.13"]'
|
||||
description: "Python versions to test (JSON array)"
|
||||
|
||||
jobs:
|
||||
run-kuzu-lance-sqlite-search-tests:
|
||||
name: Search test for Kuzu/LanceDB/Sqlite
|
||||
name: Search test for Kuzu/LanceDB/Sqlite (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/lance/sqlite') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -26,7 +35,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Dependencies already installed
|
||||
run: echo "Dependencies already installed in setup"
|
||||
|
|
@ -45,13 +54,16 @@ jobs:
|
|||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
||||
VECTOR_DB_PROVIDER: 'lancedb'
|
||||
DB_PROVIDER: 'sqlite'
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
|
||||
run-neo4j-lance-sqlite-search-tests:
|
||||
name: Search test for Neo4j/LanceDB/Sqlite
|
||||
name: Search test for Neo4j/LanceDB/Sqlite (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/lance/sqlite') }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -61,7 +73,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup Neo4j with GDS
|
||||
uses: ./.github/actions/setup_neo4j
|
||||
|
|
@ -88,12 +100,16 @@ jobs:
|
|||
GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }}
|
||||
GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }}
|
||||
GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }}
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
|
||||
run-kuzu-pgvector-postgres-search-tests:
|
||||
name: Search test for Kuzu/PGVector/Postgres
|
||||
name: Search test for Kuzu/PGVector/Postgres (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/pgvector/postgres') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
|
|
@ -117,7 +133,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
extra-dependencies: "postgres"
|
||||
|
||||
- name: Dependencies already installed
|
||||
|
|
@ -143,12 +159,16 @@ jobs:
|
|||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
|
||||
run-neo4j-pgvector-postgres-search-tests:
|
||||
name: Search test for Neo4j/PGVector/Postgres
|
||||
name: Search test for Neo4j/PGVector/Postgres (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/pgvector/postgres') }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
fail-fast: false
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
|
|
@ -172,7 +192,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
extra-dependencies: "postgres"
|
||||
|
||||
- name: Setup Neo4j with GDS
|
||||
|
|
@ -205,4 +225,4 @@ jobs:
|
|||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run python ./cognee/tests/test_search_db.py
|
||||
run: uv run pytest cognee/tests/test_search_db.py -v --log-level=INFO
|
||||
|
|
|
|||
|
|
@ -97,6 +97,21 @@ git checkout -b feature/your-feature-name
|
|||
python cognee/cognee/tests/test_library.py
|
||||
```
|
||||
|
||||
### Running Simple Example
|
||||
|
||||
Change .env.example into .env and provide your OPENAI_API_KEY as LLM_API_KEY
|
||||
|
||||
Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
|
||||
|
||||
```shell
|
||||
python cognee/cognee/examples/python/simple_example.py
|
||||
```
|
||||
or
|
||||
|
||||
```shell
|
||||
uv run python cognee/cognee/examples/python/simple_example.py
|
||||
```
|
||||
|
||||
## 4. 📤 Submitting Changes
|
||||
|
||||
1. Install ruff on your system
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
"""Enable delete for old tutorial notebooks
|
||||
|
||||
Revision ID: 1a58b986e6e1
|
||||
Revises: 46a6ce2bd2b2
|
||||
Create Date: 2025-12-17 11:04:44.414259
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "1a58b986e6e1"
|
||||
down_revision: Union[str, None] = "46a6ce2bd2b2"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def change_tutorial_deletable_flag(deletable: bool) -> None:
|
||||
bind = op.get_bind()
|
||||
inspector = sa.inspect(bind)
|
||||
|
||||
if "notebooks" not in inspector.get_table_names():
|
||||
return
|
||||
|
||||
columns = {col["name"] for col in inspector.get_columns("notebooks")}
|
||||
required_columns = {"name", "deletable"}
|
||||
if not required_columns.issubset(columns):
|
||||
return
|
||||
|
||||
notebooks = sa.table(
|
||||
"notebooks",
|
||||
sa.Column("name", sa.String()),
|
||||
sa.Column("deletable", sa.Boolean()),
|
||||
)
|
||||
|
||||
tutorial_name = "Python Development with Cognee Tutorial 🧠"
|
||||
|
||||
bind.execute(
|
||||
notebooks.update().where(notebooks.c.name == tutorial_name).values(deletable=deletable)
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
change_tutorial_deletable_flag(True)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
change_tutorial_deletable_flag(False)
|
||||
38
alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py
Normal file
38
alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""Add label column to data table
|
||||
|
||||
Revision ID: a1b2c3d4e5f6
|
||||
Revises: 211ab850ef3d
|
||||
Create Date: 2025-11-17 17:54:32.123456
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "a1b2c3d4e5f6"
|
||||
down_revision: Union[str, None] = "46a6ce2bd2b2"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def _get_column(inspector, table, name, schema=None):
|
||||
for col in inspector.get_columns(table, schema=schema):
|
||||
if col["name"] == name:
|
||||
return col
|
||||
return None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
label_column = _get_column(insp, "data", "label")
|
||||
if not label_column:
|
||||
op.add_column("data", sa.Column("label", sa.String(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("data", "label")
|
||||
51
alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py
Normal file
51
alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""add_last_accessed_to_data
|
||||
|
||||
Revision ID: e1ec1dcb50b6
|
||||
Revises: 211ab850ef3d
|
||||
Create Date: 2025-11-04 21:45:52.642322
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "e1ec1dcb50b6"
|
||||
down_revision: Union[str, None] = "a1b2c3d4e5f6"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def _get_column(inspector, table, name, schema=None):
|
||||
for col in inspector.get_columns(table, schema=schema):
|
||||
if col["name"] == name:
|
||||
return col
|
||||
return None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
||||
if not last_accessed_column:
|
||||
# Always create the column for schema consistency
|
||||
op.add_column("data", sa.Column("last_accessed", sa.DateTime(timezone=True), nullable=True))
|
||||
|
||||
# Only initialize existing records if feature is enabled
|
||||
enable_last_accessed = os.getenv("ENABLE_LAST_ACCESSED", "false").lower() == "true"
|
||||
if enable_last_accessed:
|
||||
op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
insp = sa.inspect(conn)
|
||||
|
||||
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
||||
if last_accessed_column:
|
||||
op.drop_column("data", "last_accessed")
|
||||
1286
cognee-frontend/package-lock.json
generated
1286
cognee-frontend/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -9,14 +9,15 @@
|
|||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@auth0/nextjs-auth0": "^4.13.1",
|
||||
"@auth0/nextjs-auth0": "^4.14.0",
|
||||
"classnames": "^2.5.1",
|
||||
"culori": "^4.0.1",
|
||||
"d3-force-3d": "^3.0.6",
|
||||
"next": "16.0.4",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"next": "^16.1.0",
|
||||
"react": "^19.2.3",
|
||||
"react-dom": "^19.2.3",
|
||||
"react-force-graph-2d": "^1.27.1",
|
||||
"react-markdown": "^10.1.0",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
@ -33,4 +34,4 @@
|
|||
"tailwindcss": "^4.1.7",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,8 @@ import AddDataToCognee from "./AddDataToCognee";
|
|||
import NotebooksAccordion from "./NotebooksAccordion";
|
||||
import CogneeInstancesAccordion from "./CogneeInstancesAccordion";
|
||||
import InstanceDatasetsAccordion from "./InstanceDatasetsAccordion";
|
||||
import cloudFetch from "@/modules/instances/cloudFetch";
|
||||
import localFetch from "@/modules/instances/localFetch";
|
||||
|
||||
interface DashboardProps {
|
||||
user?: {
|
||||
|
|
@ -26,6 +28,17 @@ interface DashboardProps {
|
|||
accessToken: string;
|
||||
}
|
||||
|
||||
const cogneeInstances = {
|
||||
cloudCognee: {
|
||||
name: "CloudCognee",
|
||||
fetch: cloudFetch,
|
||||
},
|
||||
localCognee: {
|
||||
name: "LocalCognee",
|
||||
fetch: localFetch,
|
||||
}
|
||||
};
|
||||
|
||||
export default function Dashboard({ accessToken }: DashboardProps) {
|
||||
fetch.setAccessToken(accessToken);
|
||||
const { user } = useAuthenticatedUser();
|
||||
|
|
@ -38,7 +51,7 @@ export default function Dashboard({ accessToken }: DashboardProps) {
|
|||
updateNotebook,
|
||||
saveNotebook,
|
||||
removeNotebook,
|
||||
} = useNotebooks();
|
||||
} = useNotebooks(cogneeInstances.localCognee);
|
||||
|
||||
useEffect(() => {
|
||||
if (!notebooks.length) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { useCallback, useEffect } from "react";
|
|||
|
||||
import { fetch, isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { checkCloudConnection } from "@/modules/cloud";
|
||||
import { setApiKey } from "@/modules/instances/cloudFetch";
|
||||
import { CaretIcon, CloseIcon, CloudIcon, LocalCogneeIcon } from "@/ui/Icons";
|
||||
import { CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements";
|
||||
|
||||
|
|
@ -24,6 +25,7 @@ export default function InstanceDatasetsAccordion({ onDatasetsChange }: Instance
|
|||
const checkConnectionToCloudCognee = useCallback((apiKey?: string) => {
|
||||
if (apiKey) {
|
||||
fetch.setApiKey(apiKey);
|
||||
setApiKey(apiKey);
|
||||
}
|
||||
return checkCloudConnection()
|
||||
.then(setCloudCogneeConnected)
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ function useDatasets(useCloud = false) {
|
|||
})
|
||||
.catch((error) => {
|
||||
console.error('Error fetching datasets:', error);
|
||||
throw error;
|
||||
});
|
||||
}, [useCloud]);
|
||||
|
||||
|
|
|
|||
59
cognee-frontend/src/modules/instances/cloudFetch.ts
Normal file
59
cognee-frontend/src/modules/instances/cloudFetch.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
// let numberOfRetries = 0;
|
||||
|
||||
const cloudApiUrl = process.env.NEXT_PUBLIC_CLOUD_API_URL || "http://localhost:8001";
|
||||
|
||||
let apiKey: string | null = process.env.NEXT_PUBLIC_COGWIT_API_KEY || null;
|
||||
|
||||
export function setApiKey(newApiKey: string) {
|
||||
apiKey = newApiKey;
|
||||
};
|
||||
|
||||
export default async function cloudFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
// function retry(lastError: Response) {
|
||||
// if (numberOfRetries >= 1) {
|
||||
// return Promise.reject(lastError);
|
||||
// }
|
||||
|
||||
// numberOfRetries += 1;
|
||||
|
||||
// return global.fetch("/auth/token")
|
||||
// .then(() => {
|
||||
// return fetch(url, options);
|
||||
// });
|
||||
// }
|
||||
|
||||
const authHeaders = {
|
||||
"Authorization": `X-Api-Key ${apiKey}`,
|
||||
};
|
||||
|
||||
return global.fetch(
|
||||
cloudApiUrl + "/api" + (typeof url === "string" ? url : url.toString()).replace("/v1", ""),
|
||||
{
|
||||
...options,
|
||||
headers: {
|
||||
...options.headers,
|
||||
...authHeaders,
|
||||
} as HeadersInit,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, true))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
// .finally(() => {
|
||||
// numberOfRetries = 0;
|
||||
// });
|
||||
}
|
||||
27
cognee-frontend/src/modules/instances/localFetch.ts
Normal file
27
cognee-frontend/src/modules/instances/localFetch.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import handleServerErrors from "@/utils/handleServerErrors";
|
||||
|
||||
const localApiUrl = process.env.NEXT_PUBLIC_LOCAL_API_URL || "http://localhost:8000";
|
||||
|
||||
export default async function localFetch(url: URL | RequestInfo, options: RequestInit = {}): Promise<Response> {
|
||||
return global.fetch(
|
||||
localApiUrl + "/api" + (typeof url === "string" ? url : url.toString()),
|
||||
{
|
||||
...options,
|
||||
credentials: "include",
|
||||
},
|
||||
)
|
||||
.then((response) => handleServerErrors(response, null, false))
|
||||
.catch((error) => {
|
||||
if (error.message === "NEXT_REDIRECT") {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
);
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
});
|
||||
}
|
||||
4
cognee-frontend/src/modules/instances/types.ts
Normal file
4
cognee-frontend/src/modules/instances/types.ts
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
export interface CogneeInstance {
|
||||
name: string;
|
||||
fetch: typeof global.fetch;
|
||||
}
|
||||
13
cognee-frontend/src/modules/notebooks/createNotebook.ts
Normal file
13
cognee-frontend/src/modules/notebooks/createNotebook.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function createNotebook(notebookName: string, instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
7
cognee-frontend/src/modules/notebooks/deleteNotebook.ts
Normal file
7
cognee-frontend/src/modules/notebooks/deleteNotebook.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function deleteNotebook(notebookId: string, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
}
|
||||
12
cognee-frontend/src/modules/notebooks/getNotebooks.ts
Normal file
12
cognee-frontend/src/modules/notebooks/getNotebooks.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function getNotebooks(instance: CogneeInstance) {
|
||||
return instance.fetch("/v1/notebooks/", {
|
||||
method: "GET",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
14
cognee-frontend/src/modules/notebooks/runNotebookCell.ts
Normal file
14
cognee-frontend/src/modules/notebooks/runNotebookCell.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import { Cell } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function runNotebookCell(notebookId: string, cell: Cell, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) => response.json());
|
||||
}
|
||||
13
cognee-frontend/src/modules/notebooks/saveNotebook.ts
Normal file
13
cognee-frontend/src/modules/notebooks/saveNotebook.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
|
||||
export default function saveNotebook(notebookId: string, notebookData: object, instance: CogneeInstance) {
|
||||
return instance.fetch(`/v1/notebooks/${notebookId}`, {
|
||||
body: JSON.stringify(notebookData),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then((response: Response) =>
|
||||
response.ok ? response.json() : Promise.reject(response)
|
||||
);
|
||||
}
|
||||
|
|
@ -1,20 +1,18 @@
|
|||
import { useCallback, useState } from "react";
|
||||
import { fetch, isCloudEnvironment } from "@/utils";
|
||||
import { Cell, Notebook } from "@/ui/elements/Notebook/types";
|
||||
import { CogneeInstance } from "@/modules/instances/types";
|
||||
import createNotebook from "./createNotebook";
|
||||
import deleteNotebook from "./deleteNotebook";
|
||||
import getNotebooks from "./getNotebooks";
|
||||
import runNotebookCell from "./runNotebookCell";
|
||||
import { default as persistNotebook } from "./saveNotebook";
|
||||
|
||||
function useNotebooks() {
|
||||
function useNotebooks(instance: CogneeInstance) {
|
||||
const [notebooks, setNotebooks] = useState<Notebook[]>([]);
|
||||
|
||||
const addNotebook = useCallback((notebookName: string) => {
|
||||
return fetch("/v1/notebooks", {
|
||||
body: JSON.stringify({ name: notebookName }),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
.then((notebook) => {
|
||||
return createNotebook(notebookName, instance)
|
||||
.then((notebook: Notebook) => {
|
||||
setNotebooks((notebooks) => [
|
||||
...notebooks,
|
||||
notebook,
|
||||
|
|
@ -22,36 +20,29 @@ function useNotebooks() {
|
|||
|
||||
return notebook;
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const removeNotebook = useCallback((notebookId: string) => {
|
||||
return fetch(`/v1/notebooks/${notebookId}`, {
|
||||
method: "DELETE",
|
||||
}, isCloudEnvironment())
|
||||
return deleteNotebook(notebookId, instance)
|
||||
.then(() => {
|
||||
setNotebooks((notebooks) =>
|
||||
notebooks.filter((notebook) => notebook.id !== notebookId)
|
||||
);
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const fetchNotebooks = useCallback(() => {
|
||||
return fetch("/v1/notebooks", {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
return getNotebooks(instance)
|
||||
.then((notebooks) => {
|
||||
setNotebooks(notebooks);
|
||||
|
||||
return notebooks;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("Error fetching notebooks:", error);
|
||||
console.error("Error fetching notebooks:", error.detail);
|
||||
throw error
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
const updateNotebook = useCallback((updatedNotebook: Notebook) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
|
|
@ -64,20 +55,13 @@ function useNotebooks() {
|
|||
}, []);
|
||||
|
||||
const saveNotebook = useCallback((notebook: Notebook) => {
|
||||
return fetch(`/v1/notebooks/${notebook.id}`, {
|
||||
body: JSON.stringify({
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}),
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, isCloudEnvironment())
|
||||
.then((response) => response.json())
|
||||
}, []);
|
||||
return persistNotebook(notebook.id, {
|
||||
name: notebook.name,
|
||||
cells: notebook.cells,
|
||||
}, instance);
|
||||
}, [instance]);
|
||||
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell, cogneeInstance: string) => {
|
||||
const runCell = useCallback((notebook: Notebook, cell: Cell) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
existingNotebook.id === notebook.id ? {
|
||||
|
|
@ -89,20 +73,11 @@ function useNotebooks() {
|
|||
error: undefined,
|
||||
} : existingCell
|
||||
),
|
||||
} : notebook
|
||||
} : existingNotebook
|
||||
)
|
||||
);
|
||||
|
||||
return fetch(`/v1/notebooks/${notebook.id}/${cell.id}/run`, {
|
||||
body: JSON.stringify({
|
||||
content: cell.content,
|
||||
}),
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}, cogneeInstance === "cloud")
|
||||
.then((response) => response.json())
|
||||
return runNotebookCell(notebook.id, cell, instance)
|
||||
.then((response) => {
|
||||
setNotebooks((existingNotebooks) =>
|
||||
existingNotebooks.map((existingNotebook) =>
|
||||
|
|
@ -115,11 +90,11 @@ function useNotebooks() {
|
|||
error: response.error,
|
||||
} : existingCell
|
||||
),
|
||||
} : notebook
|
||||
} : existingNotebook
|
||||
)
|
||||
);
|
||||
});
|
||||
}, []);
|
||||
}, [instance]);
|
||||
|
||||
return {
|
||||
notebooks,
|
||||
|
|
|
|||
77
cognee-frontend/src/ui/elements/Notebook/MarkdownPreview.tsx
Normal file
77
cognee-frontend/src/ui/elements/Notebook/MarkdownPreview.tsx
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import { memo } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
|
||||
interface MarkdownPreviewProps {
|
||||
content: string;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
function MarkdownPreview({ content, className = "" }: MarkdownPreviewProps) {
|
||||
return (
|
||||
<div className={`min-h-24 max-h-96 overflow-y-auto p-4 prose prose-sm max-w-none ${className}`}>
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
h1: ({ children }) => <h1 className="text-2xl font-bold mt-4 mb-2">{children}</h1>,
|
||||
h2: ({ children }) => <h2 className="text-xl font-bold mt-3 mb-2">{children}</h2>,
|
||||
h3: ({ children }) => <h3 className="text-lg font-bold mt-3 mb-2">{children}</h3>,
|
||||
h4: ({ children }) => <h4 className="text-base font-bold mt-2 mb-1">{children}</h4>,
|
||||
h5: ({ children }) => <h5 className="text-sm font-bold mt-2 mb-1">{children}</h5>,
|
||||
h6: ({ children }) => <h6 className="text-xs font-bold mt-2 mb-1">{children}</h6>,
|
||||
p: ({ children }) => <p className="mb-2">{children}</p>,
|
||||
ul: ({ children }) => <ul className="list-disc list-inside mb-2 ml-4">{children}</ul>,
|
||||
ol: ({ children }) => <ol className="list-decimal list-inside mb-2 ml-4">{children}</ol>,
|
||||
li: ({ children }) => <li className="mb-1">{children}</li>,
|
||||
blockquote: ({ children }) => (
|
||||
<blockquote className="border-l-4 border-gray-300 pl-4 italic my-2">{children}</blockquote>
|
||||
),
|
||||
code: ({ className, children, ...props }) => {
|
||||
const isInline = !className;
|
||||
return isInline ? (
|
||||
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
) : (
|
||||
<code className="block bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto" {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
pre: ({ children }) => (
|
||||
<pre className="bg-gray-100 p-2 rounded text-sm font-mono overflow-x-auto mb-2">
|
||||
{children}
|
||||
</pre>
|
||||
),
|
||||
a: ({ href, children }) => (
|
||||
<a href={href} className="text-blue-600 hover:underline" target="_blank" rel="noopener noreferrer">
|
||||
{children}
|
||||
</a>
|
||||
),
|
||||
strong: ({ children }) => <strong className="font-bold">{children}</strong>,
|
||||
em: ({ children }) => <em className="italic">{children}</em>,
|
||||
hr: () => <hr className="my-4 border-gray-300" />,
|
||||
table: ({ children }) => (
|
||||
<div className="overflow-x-auto my-2">
|
||||
<table className="min-w-full border border-gray-300">{children}</table>
|
||||
</div>
|
||||
),
|
||||
thead: ({ children }) => <thead className="bg-gray-100">{children}</thead>,
|
||||
tbody: ({ children }) => <tbody>{children}</tbody>,
|
||||
tr: ({ children }) => <tr className="border-b border-gray-300">{children}</tr>,
|
||||
th: ({ children }) => (
|
||||
<th className="border border-gray-300 px-4 py-2 text-left font-bold">
|
||||
{children}
|
||||
</th>
|
||||
),
|
||||
td: ({ children }) => (
|
||||
<td className="border border-gray-300 px-4 py-2">{children}</td>
|
||||
),
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(MarkdownPreview);
|
||||
|
||||
|
|
@ -2,15 +2,17 @@
|
|||
|
||||
import { v4 as uuid4 } from "uuid";
|
||||
import classNames from "classnames";
|
||||
import { Fragment, MouseEvent, RefObject, useCallback, useEffect, useRef, useState } from "react";
|
||||
import { Fragment, MouseEvent, MutableRefObject, useCallback, useEffect, useRef, useState, memo } from "react";
|
||||
|
||||
import { useModal } from "@/ui/elements/Modal";
|
||||
import { CaretIcon, CloseIcon, PlusIcon } from "@/ui/Icons";
|
||||
import { IconButton, PopupMenu, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton, TextArea, Modal, GhostButton, CTAButton } from "@/ui/elements";
|
||||
import { GraphControlsAPI } from "@/app/(graph)/GraphControls";
|
||||
import GraphVisualization, { GraphVisualizationAPI } from "@/app/(graph)/GraphVisualization";
|
||||
|
||||
import NotebookCellHeader from "./NotebookCellHeader";
|
||||
import MarkdownPreview from "./MarkdownPreview";
|
||||
import { Cell, Notebook as NotebookType } from "./types";
|
||||
|
||||
interface NotebookProps {
|
||||
|
|
@ -19,7 +21,186 @@ interface NotebookProps {
|
|||
updateNotebook: (updatedNotebook: NotebookType) => void;
|
||||
}
|
||||
|
||||
interface NotebookCellProps {
|
||||
cell: Cell;
|
||||
index: number;
|
||||
isOpen: boolean;
|
||||
isMarkdownEditMode: boolean;
|
||||
onToggleOpen: () => void;
|
||||
onToggleMarkdownEdit: () => void;
|
||||
onContentChange: (value: string) => void;
|
||||
onCellRun: (cell: Cell, cogneeInstance: string) => Promise<void>;
|
||||
onCellRename: (cell: Cell) => void;
|
||||
onCellRemove: (cell: Cell) => void;
|
||||
onCellUp: (cell: Cell) => void;
|
||||
onCellDown: (cell: Cell) => void;
|
||||
onCellAdd: (afterCellIndex: number, cellType: "markdown" | "code") => void;
|
||||
}
|
||||
|
||||
const NotebookCell = memo(function NotebookCell({
|
||||
cell,
|
||||
index,
|
||||
isOpen,
|
||||
isMarkdownEditMode,
|
||||
onToggleOpen,
|
||||
onToggleMarkdownEdit,
|
||||
onContentChange,
|
||||
onCellRun,
|
||||
onCellRename,
|
||||
onCellRemove,
|
||||
onCellUp,
|
||||
onCellDown,
|
||||
onCellAdd,
|
||||
}: NotebookCellProps) {
|
||||
return (
|
||||
<Fragment>
|
||||
<div className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={onCellRun}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={onToggleOpen}>
|
||||
<CaretIcon className={classNames("transition-transform", isOpen ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={onCellRename}
|
||||
removeCell={onCellRemove}
|
||||
moveCellUp={onCellUp}
|
||||
moveCellDown={onCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{isOpen && (
|
||||
<div className="relative rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 overflow-hidden">
|
||||
<GhostButton
|
||||
onClick={onToggleMarkdownEdit}
|
||||
className="absolute top-2 right-2.5 text-xs leading-[1] !px-2 !py-1 !h-auto"
|
||||
>
|
||||
{isMarkdownEditMode ? "Preview" : "Edit"}
|
||||
</GhostButton>
|
||||
{isMarkdownEditMode ? (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={onContentChange}
|
||||
isAutoExpanding
|
||||
name="markdownInput"
|
||||
placeholder="Type your markdown here..."
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0 !bg-gray-50"
|
||||
/>
|
||||
) : (
|
||||
<MarkdownPreview content={cell.content} className="!bg-gray-50" />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => onCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => onCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
);
|
||||
});
|
||||
|
||||
export default function Notebook({ notebook, updateNotebook, runCell }: NotebookProps) {
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
const [markdownEditMode, setMarkdownEditMode] = useState<Set<string>>(new Set());
|
||||
|
||||
const toggleCellOpen = useCallback((id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
const toggleMarkdownEditMode = useCallback((id: string) => {
|
||||
setMarkdownEditMode((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id);
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (notebook.cells.length === 0) {
|
||||
const newCell: Cell = {
|
||||
|
|
@ -34,7 +215,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
});
|
||||
toggleCellOpen(newCell.id)
|
||||
}
|
||||
}, [notebook, updateNotebook]);
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
|
||||
const handleCellRun = useCallback((cell: Cell, cogneeInstance: string) => {
|
||||
return runCell(notebook, cell, cogneeInstance);
|
||||
|
|
@ -43,7 +224,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
const handleCellAdd = useCallback((afterCellIndex: number, cellType: "markdown" | "code") => {
|
||||
const newCell: Cell = {
|
||||
id: uuid4(),
|
||||
name: "new cell",
|
||||
name: cellType === "markdown" ? "Markdown Cell" : "Code Cell",
|
||||
type: cellType,
|
||||
content: "",
|
||||
};
|
||||
|
|
@ -59,7 +240,7 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
|
||||
toggleCellOpen(newCell.id);
|
||||
updateNotebook(newNotebook);
|
||||
}, [notebook, updateNotebook]);
|
||||
}, [notebook, updateNotebook, toggleCellOpen]);
|
||||
|
||||
const removeCell = useCallback((cell: Cell, event?: MouseEvent) => {
|
||||
event?.preventDefault();
|
||||
|
|
@ -81,14 +262,12 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
openCellRemoveConfirmModal(cell);
|
||||
}, [openCellRemoveConfirmModal]);
|
||||
|
||||
const handleCellInputChange = useCallback((notebook: NotebookType, cell: Cell, value: string) => {
|
||||
const newCell = {...cell, content: value };
|
||||
|
||||
const handleCellInputChange = useCallback((cellId: string, value: string) => {
|
||||
updateNotebook({
|
||||
...notebook,
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === newCell.id ? newCell : cell)),
|
||||
cells: notebook.cells.map((cell: Cell) => (cell.id === cellId ? {...cell, content: value} : cell)),
|
||||
});
|
||||
}, [updateNotebook]);
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const handleCellUp = useCallback((cell: Cell) => {
|
||||
const index = notebook.cells.indexOf(cell);
|
||||
|
|
@ -131,133 +310,28 @@ export default function Notebook({ notebook, updateNotebook, runCell }: Notebook
|
|||
}
|
||||
}, [notebook, updateNotebook]);
|
||||
|
||||
const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id)));
|
||||
|
||||
const toggleCellOpen = (id: string) => {
|
||||
setOpenCells((prev) => {
|
||||
const newState = new Set(prev);
|
||||
|
||||
if (newState.has(id)) {
|
||||
newState.delete(id)
|
||||
} else {
|
||||
newState.add(id);
|
||||
}
|
||||
|
||||
return newState;
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="bg-white rounded-xl flex flex-col gap-0.5 px-7 py-5 flex-1">
|
||||
<div className="mb-5">{notebook.name}</div>
|
||||
|
||||
{notebook.cells.map((cell: Cell, index) => (
|
||||
<Fragment key={cell.id}>
|
||||
<div key={cell.id} className="flex flex-row rounded-xl border-1 border-gray-100">
|
||||
<div className="flex flex-col flex-1 relative">
|
||||
{cell.type === "code" ? (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
runCell={handleCellRun}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<>
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your code here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-36 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
|
||||
<div className="flex flex-col bg-gray-100 overflow-x-auto max-w-full">
|
||||
{cell.result && (
|
||||
<div className="px-2 py-2">
|
||||
output: <CellResult content={cell.result} />
|
||||
</div>
|
||||
)}
|
||||
{!!cell.error?.length && (
|
||||
<div className="px-2 py-2">
|
||||
error: {cell.error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="absolute left-[-1.35rem] top-2.5">
|
||||
<IconButton className="p-[0.25rem] m-[-0.25rem]" onClick={toggleCellOpen.bind(null, cell.id)}>
|
||||
<CaretIcon className={classNames("transition-transform", openCells.has(cell.id) ? "rotate-0" : "rotate-180")} />
|
||||
</IconButton>
|
||||
</div>
|
||||
|
||||
<NotebookCellHeader
|
||||
cell={cell}
|
||||
renameCell={handleCellRename}
|
||||
removeCell={handleCellRemove}
|
||||
moveCellUp={handleCellUp}
|
||||
moveCellDown={handleCellDown}
|
||||
className="rounded-tl-xl rounded-tr-xl"
|
||||
/>
|
||||
|
||||
{openCells.has(cell.id) && (
|
||||
<TextArea
|
||||
value={cell.content}
|
||||
onChange={handleCellInputChange.bind(null, notebook, cell)}
|
||||
// onKeyUp={handleCellRunOnEnter}
|
||||
isAutoExpanding
|
||||
name="cellInput"
|
||||
placeholder="Type your text here..."
|
||||
contentEditable={true}
|
||||
className="resize-none min-h-24 max-h-96 overflow-y-auto rounded-tl-none rounded-tr-none rounded-bl-xl rounded-br-xl border-0 !outline-0"
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-[-1.35rem]">
|
||||
<PopupMenu
|
||||
openToRight={true}
|
||||
triggerElement={<PlusIcon />}
|
||||
triggerClassName="p-[0.25rem] m-[-0.25rem]"
|
||||
>
|
||||
<div className="flex flex-col gap-0.5">
|
||||
<button
|
||||
onClick={() => handleCellAdd(index, "markdown")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>text</span>
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
onClick={() => handleCellAdd(index, "code")}
|
||||
className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer"
|
||||
>
|
||||
<span>code</span>
|
||||
</div>
|
||||
</PopupMenu>
|
||||
</div>
|
||||
</Fragment>
|
||||
<NotebookCell
|
||||
key={cell.id}
|
||||
cell={cell}
|
||||
index={index}
|
||||
isOpen={openCells.has(cell.id)}
|
||||
isMarkdownEditMode={markdownEditMode.has(cell.id)}
|
||||
onToggleOpen={() => toggleCellOpen(cell.id)}
|
||||
onToggleMarkdownEdit={() => toggleMarkdownEditMode(cell.id)}
|
||||
onContentChange={(value) => handleCellInputChange(cell.id, value)}
|
||||
onCellRun={handleCellRun}
|
||||
onCellRename={handleCellRename}
|
||||
onCellRemove={handleCellRemove}
|
||||
onCellUp={handleCellUp}
|
||||
onCellDown={handleCellDown}
|
||||
onCellAdd={handleCellAdd}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
|
|
@ -288,6 +362,10 @@ function CellResult({ content }: { content: [] }) {
|
|||
getSelectedNode: () => null,
|
||||
});
|
||||
|
||||
if (content.length === 0) {
|
||||
return <span>OK</span>;
|
||||
}
|
||||
|
||||
for (const line of content) {
|
||||
try {
|
||||
if (Array.isArray(line)) {
|
||||
|
|
@ -298,7 +376,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph</span>
|
||||
<GraphVisualization
|
||||
data={transformInsightsGraphData(line)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -346,7 +424,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
|
|
@ -356,8 +434,7 @@ function CellResult({ content }: { content: [] }) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "string") {
|
||||
const datasets = Array.from(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
new Set(Object.values(line["datasets"]).map((dataset: any) => dataset.name))
|
||||
|
|
@ -369,39 +446,46 @@ function CellResult({ content }: { content: [] }) {
|
|||
<span className="block px-2 py-2 whitespace-normal">{line["result"]}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (typeof(line) === "object" && line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as RefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
if (line["graphs"]) {
|
||||
Object.entries<{ nodes: []; edges: []; }>(line["graphs"]).forEach(([datasetName, graph]) => {
|
||||
parsedContent.push(
|
||||
<div key={datasetName} className="w-full h-full bg-white">
|
||||
<span className="text-sm pl-2 mb-4">reasoning graph (datasets: {datasetName})</span>
|
||||
<GraphVisualization
|
||||
data={transformToVisualizationData(graph)}
|
||||
ref={graphRef as MutableRefObject<GraphVisualizationAPI>}
|
||||
graphControls={graphControls}
|
||||
className="min-h-80"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
else if (typeof(line) === "object" && line["result"] && typeof(line["result"]) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line["result"], null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
if (typeof(line) === "string") {
|
||||
else if (typeof(line) === "object") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm" key={String(line).slice(0, -10)}>
|
||||
{JSON.stringify(line, null, 2)}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
else if (typeof(line) === "string") {
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
</pre>
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
} catch {
|
||||
// It is fine if we don't manage to parse the output line, we show it as it is.
|
||||
parsedContent.push(
|
||||
<pre className="px-2 w-full h-full bg-white text-sm whitespace-normal" key={String(line).slice(0, -10)}>
|
||||
{line}
|
||||
|
|
@ -415,7 +499,6 @@ function CellResult({ content }: { content: [] }) {
|
|||
{item}
|
||||
</div>
|
||||
));
|
||||
|
||||
};
|
||||
|
||||
function transformToVisualizationData(graph: { nodes: [], edges: [] }) {
|
||||
|
|
@ -471,7 +554,7 @@ function transformInsightsGraphData(triplets: Triplet[]) {
|
|||
label: triplet[1]["relationship_name"],
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
nodes: Object.values(nodes),
|
||||
links: Object.values(links),
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import classNames from "classnames";
|
||||
|
||||
import { isCloudEnvironment, useBoolean } from "@/utils";
|
||||
import { PlayIcon } from "@/ui/Icons";
|
||||
import { PopupMenu, IconButton } from "@/ui/elements";
|
||||
import PopupMenu from "@/ui/elements/PopupMenu";
|
||||
import { IconButton } from "@/ui/elements";
|
||||
import { LoadingIndicator } from "@/ui/App";
|
||||
|
||||
import { Cell } from "./types";
|
||||
|
|
@ -39,7 +42,7 @@ export default function NotebookCellHeader({
|
|||
if (runCell) {
|
||||
setIsRunningCell();
|
||||
runCell(cell, runInstance)
|
||||
.then(() => {
|
||||
.finally(() => {
|
||||
setIsNotRunningCell();
|
||||
});
|
||||
}
|
||||
|
|
@ -53,7 +56,7 @@ export default function NotebookCellHeader({
|
|||
{isRunningCell ? <LoadingIndicator /> : <IconButton onClick={handleCellRun}><PlayIcon /></IconButton>}
|
||||
</>
|
||||
)}
|
||||
<span className="ml-4">{cell.name}</span>
|
||||
<span className="ml-4">{cell.type === "markdown" ? "Markdown Cell" : cell.name}</span>
|
||||
</div>
|
||||
<div className="pr-4 flex flex-row items-center gap-8">
|
||||
{runCell && (
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
"use client";
|
||||
|
||||
import classNames from "classnames";
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useLayoutEffect, useRef } from "react"
|
||||
import { InputHTMLAttributes, useCallback, useEffect, useRef } from "react"
|
||||
|
||||
interface TextAreaProps extends Omit<InputHTMLAttributes<HTMLTextAreaElement>, "onChange"> {
|
||||
isAutoExpanding?: boolean; // Set to true to enable auto-expanding text area behavior. Default is false.
|
||||
value: string;
|
||||
onChange: (value: string) => void;
|
||||
value?: string;
|
||||
onChange?: (value: string) => void;
|
||||
}
|
||||
|
||||
export default function TextArea({
|
||||
|
|
@ -19,95 +19,81 @@ export default function TextArea({
|
|||
placeholder = "",
|
||||
onKeyUp,
|
||||
...props
|
||||
}: TextAreaProps) {
|
||||
const handleTextChange = useCallback((event: Event) => {
|
||||
const fakeTextAreaElement = event.target as HTMLDivElement;
|
||||
const newValue = fakeTextAreaElement.innerText;
|
||||
}: TextAreaProps) {
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const maxHeightRef = useRef<number | null>(null);
|
||||
const throttleTimeoutRef = useRef<number | null>(null);
|
||||
const lastAdjustTimeRef = useRef<number>(0);
|
||||
const THROTTLE_MS = 250; // 4 calculations per second
|
||||
|
||||
const adjustHeight = useCallback(() => {
|
||||
if (!isAutoExpanding || !textareaRef.current) return;
|
||||
|
||||
const textarea = textareaRef.current;
|
||||
|
||||
// Cache maxHeight on first calculation
|
||||
if (maxHeightRef.current === null) {
|
||||
const computedStyle = getComputedStyle(textarea);
|
||||
maxHeightRef.current = computedStyle.maxHeight === "none"
|
||||
? Infinity
|
||||
: parseInt(computedStyle.maxHeight) || Infinity;
|
||||
}
|
||||
|
||||
// Reset height to auto to get the correct scrollHeight
|
||||
textarea.style.height = "auto";
|
||||
// Set height to scrollHeight, but respect max-height
|
||||
const scrollHeight = textarea.scrollHeight;
|
||||
textarea.style.height = `${Math.min(scrollHeight, maxHeightRef.current)}px`;
|
||||
lastAdjustTimeRef.current = Date.now();
|
||||
}, [isAutoExpanding]);
|
||||
|
||||
const handleChange = useCallback((event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
const newValue = event.target.value;
|
||||
onChange?.(newValue);
|
||||
}, [onChange]);
|
||||
|
||||
const handleKeyUp = useCallback((event: Event) => {
|
||||
if (onKeyUp) {
|
||||
onKeyUp(event as unknown as React.KeyboardEvent<HTMLTextAreaElement>);
|
||||
}
|
||||
}, [onKeyUp]);
|
||||
// Throttle height adjustments to avoid blocking typing
|
||||
if (isAutoExpanding) {
|
||||
const now = Date.now();
|
||||
const timeSinceLastAdjust = now - lastAdjustTimeRef.current;
|
||||
|
||||
const handleTextAreaFocus = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (event.target.innerText.trim() === placeholder) {
|
||||
event.target.innerText = "";
|
||||
}
|
||||
};
|
||||
const handleTextAreaBlur = (event: React.FocusEvent<HTMLDivElement>) => {
|
||||
if (value === "") {
|
||||
event.target.innerText = placeholder;
|
||||
}
|
||||
};
|
||||
|
||||
const handleChange = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
onChange(event.target.value);
|
||||
};
|
||||
|
||||
const fakeTextAreaRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement && fakeTextAreaElement.innerText.trim() !== "") {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
}
|
||||
}, [placeholder]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.addEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.addEventListener("keyup", handleKeyUp);
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (fakeTextAreaElement) {
|
||||
fakeTextAreaElement.removeEventListener("input", handleTextChange);
|
||||
fakeTextAreaElement.removeEventListener("keyup", handleKeyUp);
|
||||
if (timeSinceLastAdjust >= THROTTLE_MS) {
|
||||
adjustHeight();
|
||||
} else {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
}
|
||||
throttleTimeoutRef.current = window.setTimeout(() => {
|
||||
adjustHeight();
|
||||
throttleTimeoutRef.current = null;
|
||||
}, THROTTLE_MS - timeSinceLastAdjust);
|
||||
}
|
||||
};
|
||||
}, [handleKeyUp, handleTextChange]);
|
||||
}
|
||||
}, [onChange, isAutoExpanding, adjustHeight]);
|
||||
|
||||
useEffect(() => {
|
||||
const fakeTextAreaElement = fakeTextAreaRef.current;
|
||||
const textAreaText = fakeTextAreaElement?.innerText;
|
||||
|
||||
if (fakeTextAreaElement && (value === "" || value === "\n")) {
|
||||
fakeTextAreaElement.innerText = placeholder;
|
||||
return;
|
||||
if (isAutoExpanding && textareaRef.current) {
|
||||
adjustHeight();
|
||||
}
|
||||
}, [value, isAutoExpanding, adjustHeight]);
|
||||
|
||||
if (fakeTextAreaElement && textAreaText !== value) {
|
||||
fakeTextAreaElement.innerText = value;
|
||||
}
|
||||
}, [placeholder, value]);
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (throttleTimeoutRef.current !== null) {
|
||||
clearTimeout(throttleTimeoutRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return isAutoExpanding ? (
|
||||
<>
|
||||
<div
|
||||
ref={fakeTextAreaRef}
|
||||
contentEditable="true"
|
||||
role="textbox"
|
||||
aria-multiline="true"
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onFocus={handleTextAreaFocus}
|
||||
onBlur={handleTextAreaBlur}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
return (
|
||||
<textarea
|
||||
ref={isAutoExpanding ? textareaRef : undefined}
|
||||
name={name}
|
||||
style={style}
|
||||
value={value}
|
||||
placeholder={placeholder}
|
||||
className={classNames("block w-full rounded-md bg-white px-4 py-4 text-base text-gray-900 outline-1 -outline-offset-1 outline-gray-300 placeholder:text-gray-400 focus:outline-2 focus:-outline-offset-2 focus:outline-indigo-600", className)}
|
||||
onChange={handleChange}
|
||||
onKeyUp={onKeyUp}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
import { redirect } from "next/navigation";
|
||||
|
||||
export default function handleServerErrors(response: Response, retry?: (response: Response) => Promise<Response>, useCloud?: boolean): Promise<Response> {
|
||||
export default function handleServerErrors(
|
||||
response: Response,
|
||||
retry: ((response: Response) => Promise<Response>) | null = null,
|
||||
useCloud: boolean = false,
|
||||
): Promise<Response> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (response.status === 401 && !useCloud) {
|
||||
if ((response.status === 401 || response.status === 403) && !useCloud) {
|
||||
if (retry) {
|
||||
return retry(response)
|
||||
.catch(() => {
|
||||
|
|
|
|||
|
|
@ -627,8 +627,7 @@ class TestModel:
|
|||
print(f"Failed: {failed}")
|
||||
print(f"Success Rate: {(passed / total_tests * 100):.1f}%")
|
||||
|
||||
if failed > 0:
|
||||
print(f"\n ⚠️ {failed} test(s) failed - review results above for details")
|
||||
assert failed == 0, f"\n ⚠️ {failed} test(s) failed - review results above for details"
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
|
|||
4183
cognee-mcp/uv.lock
generated
4183
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,15 @@
|
|||
# ⚠️ DEPRECATED - Go to `new-examples/` Instead
|
||||
|
||||
This starter kit is deprecated. Its examples have been integrated into the `/new-examples/` folder.
|
||||
|
||||
| Old Location | New Location |
|
||||
|--------------|--------------|
|
||||
| `src/pipelines/default.py` | none |
|
||||
| `src/pipelines/low_level.py` | `new-examples/custom_pipelines/organizational_hierarchy/` |
|
||||
| `src/pipelines/custom-model.py` | `new-examples/demos/custom_graph_model_entity_schema_definition.py` |
|
||||
| `src/data/` | Included in `new-examples/custom_pipelines/organizational_hierarchy/data/` |
|
||||
|
||||
----------
|
||||
|
||||
# Cognee Starter Kit
|
||||
Welcome to the <a href="https://github.com/topoteretes/cognee">cognee</a> Starter Repo! This repository is designed to help you get started quickly by providing a structured dataset and pre-built data pipelines using cognee to build powerful knowledge graphs.
|
||||
|
|
|
|||
|
|
@ -10,13 +10,14 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|||
)
|
||||
from cognee.modules.engine.operations.setup import setup
|
||||
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
||||
from cognee.tasks.ingestion.data_item import DataItem
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def add(
|
||||
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
||||
data: Union[BinaryIO, list[BinaryIO], str, list[str], DataItem, list[DataItem]],
|
||||
dataset_name: str = "main_dataset",
|
||||
user: User = None,
|
||||
node_set: Optional[List[str]] = None,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@ from fastapi import status
|
|||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi import HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
|
@ -44,6 +46,7 @@ class DatasetDTO(OutDTO):
|
|||
class DataDTO(OutDTO):
|
||||
id: UUID
|
||||
name: str
|
||||
label: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime] = None
|
||||
extension: str
|
||||
|
|
@ -475,6 +478,40 @@ def get_datasets_router() -> APIRouter:
|
|||
message=f"Data ({data_id}) not found in dataset ({dataset_id})."
|
||||
)
|
||||
|
||||
return data.raw_data_location
|
||||
raw_location = data.raw_data_location
|
||||
|
||||
if raw_location.startswith("file://"):
|
||||
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
||||
|
||||
raw_location = get_data_file_path(raw_location)
|
||||
|
||||
if raw_location.startswith("s3://"):
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.infrastructure.utils.run_async import run_async
|
||||
|
||||
parsed = urlparse(raw_location)
|
||||
download_name = Path(parsed.path).name or data.name
|
||||
media_type = data.mime_type or "application/octet-stream"
|
||||
|
||||
async def file_iterator(chunk_size: int = 1024 * 1024):
|
||||
async with open_data_file(raw_location, mode="rb") as file:
|
||||
while True:
|
||||
chunk = await run_async(file.read, chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
file_iterator(),
|
||||
media_type=media_type,
|
||||
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
|
||||
)
|
||||
|
||||
path = Path(raw_location)
|
||||
|
||||
if not path.is_file():
|
||||
raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
|
||||
|
||||
return FileResponse(path=path)
|
||||
|
||||
return router
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ def get_memify_router() -> APIRouter:
|
|||
dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
|
||||
node_name=payload.node_name,
|
||||
user=user,
|
||||
run_in_background=payload.run_in_background,
|
||||
)
|
||||
|
||||
if isinstance(memify_run, PipelineRunErrored):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import json
|
||||
import pydantic
|
||||
from typing import Union
|
||||
from functools import lru_cache
|
||||
|
|
@ -19,6 +20,7 @@ class RelationalConfig(BaseSettings):
|
|||
db_username: Union[str, None] = None # "cognee"
|
||||
db_password: Union[str, None] = None # "cognee"
|
||||
db_provider: str = "sqlite"
|
||||
database_connect_args: Union[str, None] = None
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
|
|
@ -30,6 +32,17 @@ class RelationalConfig(BaseSettings):
|
|||
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
||||
self.db_path = databases_directory_path
|
||||
|
||||
# Parse database_connect_args if provided as JSON string
|
||||
if self.database_connect_args and isinstance(self.database_connect_args, str):
|
||||
try:
|
||||
parsed_args = json.loads(self.database_connect_args)
|
||||
if isinstance(parsed_args, dict):
|
||||
self.database_connect_args = parsed_args
|
||||
else:
|
||||
self.database_connect_args = {}
|
||||
except json.JSONDecodeError:
|
||||
self.database_connect_args = {}
|
||||
|
||||
return self
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
|
|
@ -40,7 +53,8 @@ class RelationalConfig(BaseSettings):
|
|||
--------
|
||||
|
||||
- dict: A dictionary containing database configuration settings including db_path,
|
||||
db_name, db_host, db_port, db_username, db_password, and db_provider.
|
||||
db_name, db_host, db_port, db_username, db_password, db_provider, and
|
||||
database_connect_args.
|
||||
"""
|
||||
return {
|
||||
"db_path": self.db_path,
|
||||
|
|
@ -50,6 +64,7 @@ class RelationalConfig(BaseSettings):
|
|||
"db_username": self.db_username,
|
||||
"db_password": self.db_password,
|
||||
"db_provider": self.db_provider,
|
||||
"database_connect_args": self.database_connect_args,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from sqlalchemy import URL
|
||||
from .sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
||||
from functools import lru_cache
|
||||
|
||||
|
|
@ -11,6 +12,7 @@ def create_relational_engine(
|
|||
db_username: str,
|
||||
db_password: str,
|
||||
db_provider: str,
|
||||
database_connect_args: dict = None,
|
||||
):
|
||||
"""
|
||||
Create a relational database engine based on the specified parameters.
|
||||
|
|
@ -29,6 +31,7 @@ def create_relational_engine(
|
|||
- db_password (str): The password for database authentication, required for
|
||||
PostgreSQL.
|
||||
- db_provider (str): The type of database provider (e.g., 'sqlite' or 'postgres').
|
||||
- database_connect_args (dict, optional): Database driver connection arguments.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
|
@ -43,12 +46,19 @@ def create_relational_engine(
|
|||
# Test if asyncpg is available
|
||||
import asyncpg
|
||||
|
||||
connection_string = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
# Handle special characters in username and password like # or @
|
||||
connection_string = URL.create(
|
||||
"postgresql+asyncpg",
|
||||
username=db_username,
|
||||
password=db_password,
|
||||
host=db_host,
|
||||
port=int(db_port),
|
||||
database=db_name,
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality."
|
||||
)
|
||||
|
||||
return SQLAlchemyAdapter(connection_string)
|
||||
return SQLAlchemyAdapter(connection_string, connect_args=database_connect_args)
|
||||
|
|
|
|||
|
|
@ -29,10 +29,31 @@ class SQLAlchemyAdapter:
|
|||
functions.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_string: str):
|
||||
def __init__(self, connection_string: str, connect_args: dict = None):
|
||||
"""
|
||||
Initialize the SQLAlchemy adapter with connection settings.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
connection_string (str): The database connection string (e.g., 'sqlite:///path/to/db'
|
||||
or 'postgresql://user:pass@host:port/db').
|
||||
connect_args (dict, optional): Database driver connection arguments.
|
||||
Configuration is loaded from RelationalConfig.database_connect_args, which reads
|
||||
from the DATABASE_CONNECT_ARGS environment variable.
|
||||
|
||||
Examples:
|
||||
PostgreSQL with SSL:
|
||||
DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
|
||||
|
||||
SQLite with custom timeout:
|
||||
DATABASE_CONNECT_ARGS='{"timeout": 60}'
|
||||
"""
|
||||
self.db_path: str = None
|
||||
self.db_uri: str = connection_string
|
||||
|
||||
# Use provided connect_args (already parsed from config)
|
||||
final_connect_args = connect_args or {}
|
||||
|
||||
if "sqlite" in connection_string:
|
||||
[prefix, db_path] = connection_string.split("///")
|
||||
self.db_path = db_path
|
||||
|
|
@ -53,7 +74,7 @@ class SQLAlchemyAdapter:
|
|||
self.engine = create_async_engine(
|
||||
connection_string,
|
||||
poolclass=NullPool,
|
||||
connect_args={"timeout": 30},
|
||||
connect_args={**{"timeout": 30}, **final_connect_args},
|
||||
)
|
||||
else:
|
||||
self.engine = create_async_engine(
|
||||
|
|
@ -63,6 +84,7 @@ class SQLAlchemyAdapter:
|
|||
pool_recycle=280,
|
||||
pool_pre_ping=True,
|
||||
pool_timeout=280,
|
||||
connect_args=final_connect_args,
|
||||
)
|
||||
|
||||
self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from sqlalchemy import URL
|
||||
|
||||
from .supported_databases import supported_databases
|
||||
from .embeddings import get_embedding_engine
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
||||
|
|
@ -66,8 +68,13 @@ def create_vector_engine(
|
|||
if not (db_host and db_port and db_name and db_username and db_password):
|
||||
raise EnvironmentError("Missing requred pgvector credentials!")
|
||||
|
||||
connection_string: str = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
connection_string = URL.create(
|
||||
"postgresql+asyncpg",
|
||||
username=db_username,
|
||||
password=db_password,
|
||||
host=db_host,
|
||||
port=int(db_port),
|
||||
database=db_name,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ from tenacity import (
|
|||
)
|
||||
import litellm
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
||||
from cognee.infrastructure.llm.tokenizer.HuggingFace import (
|
||||
|
|
@ -79,10 +81,26 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
enable_mocking = str(enable_mocking).lower()
|
||||
self.mock = enable_mocking in ("true", "1", "yes")
|
||||
|
||||
# Validate provided custom embedding endpoint early to avoid long hangs later
|
||||
if self.endpoint:
|
||||
try:
|
||||
parsed = urlparse(self.endpoint)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if not parsed or parsed.scheme not in ("http", "https") or not parsed.netloc:
|
||||
logger.error(
|
||||
"Invalid EMBEDDING_ENDPOINT configured: '%s'. Expected a URL starting with http:// or https://",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Invalid EMBEDDING_ENDPOINT. Please set a valid URL (e.g., https://host:port) "
|
||||
"via environment variable EMBEDDING_ENDPOINT."
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
stop=stop_after_delay(30),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
retry=retry_if_not_exception_type((litellm.exceptions.NotFoundError, EmbeddingException)),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
|
|
@ -111,12 +129,16 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
return [data["embedding"] for data in response["data"]]
|
||||
else:
|
||||
async with embedding_rate_limiter_context_manager():
|
||||
response = await litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
# Ensure each attempt does not hang indefinitely
|
||||
response = await asyncio.wait_for(
|
||||
litellm.aembedding(
|
||||
model=self.model,
|
||||
input=text,
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
),
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
return [data["embedding"] for data in response.data]
|
||||
|
|
@ -154,6 +176,27 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
logger.error("Context window exceeded for embedding text: %s", str(error))
|
||||
raise error
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
# Per-attempt timeout – likely an unreachable endpoint
|
||||
logger.error(
|
||||
"Embedding endpoint timed out. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Verify that the endpoint is reachable and correct.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding request timed out. Check EMBEDDING_ENDPOINT connectivity."
|
||||
) from e
|
||||
|
||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
||||
logger.error(
|
||||
"Failed to connect to embedding endpoint. EMBEDDING_ENDPOINT='%s'. "
|
||||
"Ensure the URL is correct and the server is running.",
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Cannot connect to embedding endpoint. Check EMBEDDING_ENDPOINT."
|
||||
) from e
|
||||
|
||||
except (
|
||||
litellm.exceptions.BadRequestError,
|
||||
litellm.exceptions.NotFoundError,
|
||||
|
|
@ -162,8 +205,15 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
raise EmbeddingException(f"Failed to index data points using model {self.model}") from e
|
||||
|
||||
except Exception as error:
|
||||
logger.error("Error embedding text: %s", str(error))
|
||||
raise error
|
||||
# Fall back to a clear, actionable message for connectivity/misconfiguration issues
|
||||
logger.error(
|
||||
"Error embedding text: %s. EMBEDDING_ENDPOINT='%s'.",
|
||||
str(error),
|
||||
str(self.endpoint),
|
||||
)
|
||||
raise EmbeddingException(
|
||||
"Embedding failed due to an unexpected error. Verify EMBEDDING_ENDPOINT and provider settings."
|
||||
) from error
|
||||
|
||||
def get_vector_size(self) -> int:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -37,19 +37,6 @@ class LLMGateway:
|
|||
**kwargs,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_structured_output(
|
||||
text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
||||
) -> BaseModel:
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
|
||||
get_llm_client,
|
||||
)
|
||||
|
||||
llm_client = get_llm_client()
|
||||
return llm_client.create_structured_output(
|
||||
text_input=text_input, system_prompt=system_prompt, response_model=response_model
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_transcript(input) -> Coroutine:
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ from typing import Type
|
|||
from pydantic import BaseModel
|
||||
import litellm
|
||||
import instructor
|
||||
import anthropic
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_delay,
|
||||
|
|
@ -12,38 +14,41 @@ from tenacity import (
|
|||
before_sleep_log,
|
||||
)
|
||||
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||
GenericAPIAdapter,
|
||||
)
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
from cognee.infrastructure.llm.config import get_llm_config
|
||||
|
||||
logger = get_logger()
|
||||
observe = get_observe()
|
||||
|
||||
|
||||
class AnthropicAdapter(LLMInterface):
|
||||
class AnthropicAdapter(GenericAPIAdapter):
|
||||
"""
|
||||
Adapter for interfacing with the Anthropic API, enabling structured output generation
|
||||
and prompt display.
|
||||
"""
|
||||
|
||||
name = "Anthropic"
|
||||
model: str
|
||||
default_instructor_mode = "anthropic_tools"
|
||||
|
||||
def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
|
||||
import anthropic
|
||||
|
||||
def __init__(
|
||||
self, api_key: str, model: str, max_completion_tokens: int, instructor_mode: str = None
|
||||
):
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
name="Anthropic",
|
||||
)
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
|
||||
self.aclient = instructor.patch(
|
||||
create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
|
||||
create=anthropic.AsyncAnthropic(api_key=self.api_key).messages.create,
|
||||
mode=instructor.Mode(self.instructor_mode),
|
||||
)
|
||||
|
||||
self.model = model
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
|
||||
@observe(as_type="generation")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Adapter for Generic API LLM provider API"""
|
||||
"""Adapter for Gemini API LLM provider"""
|
||||
|
||||
import litellm
|
||||
import instructor
|
||||
|
|
@ -8,13 +8,9 @@ from openai import ContentFilterFinishReasonError
|
|||
from litellm.exceptions import ContentPolicyViolationError
|
||||
from instructor.core import InstructorRetryException
|
||||
|
||||
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
)
|
||||
import logging
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_delay,
|
||||
|
|
@ -23,55 +19,65 @@ from tenacity import (
|
|||
before_sleep_log,
|
||||
)
|
||||
|
||||
from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||
GenericAPIAdapter,
|
||||
)
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
|
||||
logger = get_logger()
|
||||
observe = get_observe()
|
||||
|
||||
|
||||
class GeminiAdapter(LLMInterface):
|
||||
class GeminiAdapter(GenericAPIAdapter):
|
||||
"""
|
||||
Adapter for Gemini API LLM provider.
|
||||
|
||||
This class initializes the API adapter with necessary credentials and configurations for
|
||||
interacting with the gemini LLM models. It provides methods for creating structured outputs
|
||||
based on user input and system prompts.
|
||||
based on user input and system prompts, as well as multimodal processing capabilities.
|
||||
|
||||
Public methods:
|
||||
- acreate_structured_output(text_input: str, system_prompt: str, response_model:
|
||||
Type[BaseModel]) -> BaseModel
|
||||
- acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel
|
||||
- create_transcript(input) -> BaseModel: Transcribe audio files to text
|
||||
- transcribe_image(input) -> BaseModel: Inherited from GenericAPIAdapter
|
||||
"""
|
||||
|
||||
name: str
|
||||
model: str
|
||||
api_key: str
|
||||
default_instructor_mode = "json_mode"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint,
|
||||
api_key: str,
|
||||
model: str,
|
||||
api_version: str,
|
||||
max_completion_tokens: int,
|
||||
endpoint: str = None,
|
||||
api_version: str = None,
|
||||
transcription_model: str = None,
|
||||
instructor_mode: str = None,
|
||||
fallback_model: str = None,
|
||||
fallback_api_key: str = None,
|
||||
fallback_endpoint: str = None,
|
||||
):
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.endpoint = endpoint
|
||||
self.api_version = api_version
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
|
||||
self.fallback_model = fallback_model
|
||||
self.fallback_api_key = fallback_api_key
|
||||
self.fallback_endpoint = fallback_endpoint
|
||||
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
name="Gemini",
|
||||
endpoint=endpoint,
|
||||
api_version=api_version,
|
||||
transcription_model=transcription_model,
|
||||
fallback_model=fallback_model,
|
||||
fallback_api_key=fallback_api_key,
|
||||
fallback_endpoint=fallback_endpoint,
|
||||
)
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
|
||||
self.aclient = instructor.from_litellm(
|
||||
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
||||
)
|
||||
|
||||
@observe(as_type="generation")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
"""Adapter for Generic API LLM provider API"""
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
import litellm
|
||||
import instructor
|
||||
from typing import Type
|
||||
from typing import Type, Optional
|
||||
from pydantic import BaseModel
|
||||
from openai import ContentFilterFinishReasonError
|
||||
from litellm.exceptions import ContentPolicyViolationError
|
||||
|
|
@ -12,6 +14,8 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
|
|||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
)
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
import logging
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
|
@ -23,7 +27,12 @@ from tenacity import (
|
|||
before_sleep_log,
|
||||
)
|
||||
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.types import (
|
||||
TranscriptionReturnType,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
observe = get_observe()
|
||||
|
||||
|
||||
class GenericAPIAdapter(LLMInterface):
|
||||
|
|
@ -39,18 +48,19 @@ class GenericAPIAdapter(LLMInterface):
|
|||
Type[BaseModel]) -> BaseModel
|
||||
"""
|
||||
|
||||
name: str
|
||||
model: str
|
||||
api_key: str
|
||||
MAX_RETRIES = 5
|
||||
default_instructor_mode = "json_mode"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint,
|
||||
api_key: str,
|
||||
model: str,
|
||||
name: str,
|
||||
max_completion_tokens: int,
|
||||
name: str,
|
||||
endpoint: str = None,
|
||||
api_version: str = None,
|
||||
transcription_model: str = None,
|
||||
image_transcribe_model: str = None,
|
||||
instructor_mode: str = None,
|
||||
fallback_model: str = None,
|
||||
fallback_api_key: str = None,
|
||||
|
|
@ -59,9 +69,11 @@ class GenericAPIAdapter(LLMInterface):
|
|||
self.name = name
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.api_version = api_version
|
||||
self.endpoint = endpoint
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
|
||||
self.transcription_model = transcription_model or model
|
||||
self.image_transcribe_model = image_transcribe_model or model
|
||||
self.fallback_model = fallback_model
|
||||
self.fallback_api_key = fallback_api_key
|
||||
self.fallback_endpoint = fallback_endpoint
|
||||
|
|
@ -72,6 +84,7 @@ class GenericAPIAdapter(LLMInterface):
|
|||
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
||||
)
|
||||
|
||||
@observe(as_type="generation")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
|
|
@ -173,3 +186,115 @@ class GenericAPIAdapter(LLMInterface):
|
|||
raise ContentPolicyFilterError(
|
||||
f"The provided input contains content that is not aligned with our content policy: {text_input}"
|
||||
) from error
|
||||
|
||||
@observe(as_type="transcription")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def create_transcript(self, input) -> TranscriptionReturnType:
|
||||
"""
|
||||
Generate an audio transcript from a user query.
|
||||
|
||||
This method creates a transcript from the specified audio file, raising a
|
||||
FileNotFoundError if the file does not exist. The audio file is processed and the
|
||||
transcription is retrieved from the API.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- input: The path to the audio file that needs to be transcribed.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
The generated transcription of the audio file.
|
||||
"""
|
||||
async with open_data_file(input, mode="rb") as audio_file:
|
||||
encoded_string = base64.b64encode(audio_file.read()).decode("utf-8")
|
||||
mime_type, _ = mimetypes.guess_type(input)
|
||||
if not mime_type or not mime_type.startswith("audio/"):
|
||||
raise ValueError(
|
||||
f"Could not determine MIME type for audio file: {input}. Is the extension correct?"
|
||||
)
|
||||
response = await litellm.acompletion(
|
||||
model=self.transcription_model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_data": f"data:{mime_type};base64,{encoded_string}"},
|
||||
},
|
||||
{"type": "text", "text": "Transcribe the following audio precisely."},
|
||||
],
|
||||
}
|
||||
],
|
||||
api_key=self.api_key,
|
||||
api_version=self.api_version,
|
||||
max_completion_tokens=self.max_completion_tokens,
|
||||
api_base=self.endpoint,
|
||||
max_retries=self.MAX_RETRIES,
|
||||
)
|
||||
|
||||
return TranscriptionReturnType(response.choices[0].message.content, response)
|
||||
|
||||
@observe(as_type="transcribe_image")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def transcribe_image(self, input) -> BaseModel:
|
||||
"""
|
||||
Generate a transcription of an image from a user query.
|
||||
|
||||
This method encodes the image and sends a request to the API to obtain a
|
||||
description of the contents of the image.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- input: The path to the image file that needs to be transcribed.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
- BaseModel: A structured output generated by the model, returned as an instance of
|
||||
BaseModel.
|
||||
"""
|
||||
async with open_data_file(input, mode="rb") as image_file:
|
||||
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
|
||||
mime_type, _ = mimetypes.guess_type(input)
|
||||
if not mime_type or not mime_type.startswith("image/"):
|
||||
raise ValueError(
|
||||
f"Could not determine MIME type for image file: {input}. Is the extension correct?"
|
||||
)
|
||||
response = await litellm.acompletion(
|
||||
model=self.image_transcribe_model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What's in this image?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{mime_type};base64,{encoded_image}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
max_completion_tokens=300,
|
||||
max_retries=self.MAX_RETRIES,
|
||||
)
|
||||
return response
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ class LLMProvider(Enum):
|
|||
GEMINI = "gemini"
|
||||
MISTRAL = "mistral"
|
||||
BEDROCK = "bedrock"
|
||||
LLAMA_CPP = "llama_cpp"
|
||||
|
||||
|
||||
def get_llm_client(raise_api_key_error: bool = True):
|
||||
|
|
@ -103,7 +104,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|||
llm_config.llm_api_key,
|
||||
llm_config.llm_model,
|
||||
"Ollama",
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_completion_tokens,
|
||||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
)
|
||||
|
||||
|
|
@ -113,8 +114,9 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|||
)
|
||||
|
||||
return AnthropicAdapter(
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
model=llm_config.llm_model,
|
||||
llm_config.llm_api_key,
|
||||
llm_config.llm_model,
|
||||
max_completion_tokens,
|
||||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
)
|
||||
|
||||
|
|
@ -127,11 +129,10 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|||
)
|
||||
|
||||
return GenericAPIAdapter(
|
||||
llm_config.llm_endpoint,
|
||||
llm_config.llm_api_key,
|
||||
llm_config.llm_model,
|
||||
max_completion_tokens,
|
||||
"Custom",
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
fallback_api_key=llm_config.fallback_api_key,
|
||||
fallback_endpoint=llm_config.fallback_endpoint,
|
||||
|
|
@ -187,5 +188,27 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
)
|
||||
|
||||
elif provider == LLMProvider.LLAMA_CPP:
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llama_cpp.adapter import (
|
||||
LlamaCppAPIAdapter,
|
||||
)
|
||||
|
||||
# Get optional local mode parameters (will be None if not set)
|
||||
model_path = getattr(llm_config, "llama_cpp_model_path", None)
|
||||
n_ctx = getattr(llm_config, "llama_cpp_n_ctx", 2048)
|
||||
n_gpu_layers = getattr(llm_config, "llama_cpp_n_gpu_layers", 0)
|
||||
chat_format = getattr(llm_config, "llama_cpp_chat_format", "chatml")
|
||||
|
||||
return LlamaCppAPIAdapter(
|
||||
model=llm_config.llm_model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
||||
endpoint=llm_config.llm_endpoint,
|
||||
api_key=llm_config.llm_api_key,
|
||||
model_path=model_path,
|
||||
n_ctx=n_ctx,
|
||||
n_gpu_layers=n_gpu_layers,
|
||||
chat_format=chat_format,
|
||||
)
|
||||
else:
|
||||
raise UnsupportedLLMProviderError(provider)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,191 @@
|
|||
"""Adapter for Instructor-backed Structured Output Framework for Llama CPP"""
|
||||
|
||||
import litellm
|
||||
import logging
|
||||
import instructor
|
||||
from typing import Type, Optional
|
||||
from openai import AsyncOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
)
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_delay,
|
||||
wait_exponential_jitter,
|
||||
retry_if_not_exception_type,
|
||||
before_sleep_log,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class LlamaCppAPIAdapter(LLMInterface):
|
||||
"""
|
||||
Adapter for Llama CPP LLM provider with support for TWO modes:
|
||||
|
||||
1. SERVER MODE (OpenAI-compatible):
|
||||
- Connects to llama-cpp-python server via HTTP (local or remote)
|
||||
- Uses instructor.from_openai()
|
||||
- Requires: endpoint, api_key, model
|
||||
|
||||
2. LOCAL MODE (In-process):
|
||||
- Loads model directly using llama-cpp-python library
|
||||
- Uses instructor.patch() on llama.Llama object
|
||||
- Requires: model_path
|
||||
|
||||
Public methods:
|
||||
- acreate_structured_output
|
||||
|
||||
Instance variables:
|
||||
- name
|
||||
- model (for server mode) or model_path (for local mode)
|
||||
- mode_type: "server" or "local"
|
||||
- max_completion_tokens
|
||||
- aclient
|
||||
"""
|
||||
|
||||
name: str
|
||||
model: Optional[str]
|
||||
model_path: Optional[str]
|
||||
mode_type: str # "server" or "local"
|
||||
default_instructor_mode = instructor.Mode.JSON
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "LlamaCpp",
|
||||
max_completion_tokens: int = 2048,
|
||||
instructor_mode: Optional[str] = None,
|
||||
# Server mode parameters
|
||||
endpoint: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
# Local mode parameters
|
||||
model_path: Optional[str] = None,
|
||||
n_ctx: int = 2048,
|
||||
n_gpu_layers: int = 0,
|
||||
chat_format: str = "chatml",
|
||||
):
|
||||
self.name = name
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
|
||||
# Determine which mode to use
|
||||
if model_path:
|
||||
self._init_local_mode(model_path, n_ctx, n_gpu_layers, chat_format)
|
||||
elif endpoint:
|
||||
self._init_server_mode(endpoint, api_key, model)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Must provide either 'model_path' (for local mode) or 'endpoint' (for server mode)"
|
||||
)
|
||||
|
||||
def _init_local_mode(self, model_path: str, n_ctx: int, n_gpu_layers: int, chat_format: str):
|
||||
"""Initialize local mode using llama-cpp-python library directly"""
|
||||
try:
|
||||
import llama_cpp
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"llama-cpp-python is not installed. Install with: pip install llama-cpp-python"
|
||||
)
|
||||
|
||||
logger.info(f"Initializing LlamaCpp in LOCAL mode with model: {model_path}")
|
||||
|
||||
self.mode_type = "local"
|
||||
self.model_path = model_path
|
||||
self.model = None
|
||||
|
||||
# Initialize llama-cpp-python with the model
|
||||
self.llama = llama_cpp.Llama(
|
||||
model_path=model_path,
|
||||
n_gpu_layers=n_gpu_layers, # -1 for all GPU, 0 for CPU only
|
||||
chat_format=chat_format,
|
||||
n_ctx=n_ctx,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
self.aclient = instructor.patch(
|
||||
create=self.llama.create_chat_completion_openai_v1,
|
||||
mode=instructor.Mode(self.instructor_mode),
|
||||
)
|
||||
|
||||
def _init_server_mode(self, endpoint: str, api_key: Optional[str], model: Optional[str]):
|
||||
"""Initialize server mode connecting to llama-cpp-python server"""
|
||||
logger.info(f"Initializing LlamaCpp in SERVER mode with endpoint: {endpoint}")
|
||||
|
||||
self.mode_type = "server"
|
||||
self.model = model
|
||||
self.model_path = None
|
||||
self.endpoint = endpoint
|
||||
self.api_key = api_key
|
||||
|
||||
# Use instructor.from_openai() for server mode (OpenAI-compatible API)
|
||||
self.aclient = instructor.from_openai(
|
||||
AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key),
|
||||
mode=instructor.Mode(self.instructor_mode),
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def acreate_structured_output(
|
||||
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
|
||||
) -> BaseModel:
|
||||
"""
|
||||
Generate a structured output from the LLM using the provided text and system prompt.
|
||||
|
||||
Works in both local and server modes transparently.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- text_input (str): The input text provided by the user.
|
||||
- system_prompt (str): The system prompt that guides the response generation.
|
||||
- response_model (Type[BaseModel]): The model type that the response should conform to.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
- BaseModel: A structured output that conforms to the specified response model.
|
||||
"""
|
||||
async with llm_rate_limiter_context_manager():
|
||||
# Prepare messages (system first, then user is more standard)
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": text_input},
|
||||
]
|
||||
|
||||
if self.mode_type == "server":
|
||||
# Server mode: use async client with OpenAI-compatible API
|
||||
response = await self.aclient.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
response_model=response_model,
|
||||
max_retries=2,
|
||||
max_completion_tokens=self.max_completion_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
import asyncio
|
||||
|
||||
# Local mode: instructor.patch() returns a SYNC callable
|
||||
# Per docs: https://python.useinstructor.com/integrations/llama-cpp-python/
|
||||
def _call_sync():
|
||||
return self.aclient(
|
||||
messages=messages,
|
||||
response_model=response_model,
|
||||
max_tokens=self.max_completion_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Run sync function in thread pool to avoid blocking
|
||||
response = await asyncio.to_thread(_call_sync)
|
||||
|
||||
return response
|
||||
|
|
@ -3,18 +3,14 @@
|
|||
from typing import Type, Protocol
|
||||
from abc import abstractmethod
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||
|
||||
|
||||
class LLMInterface(Protocol):
|
||||
"""
|
||||
Define an interface for LLM models with methods for structured output and prompt
|
||||
display.
|
||||
Define an interface for LLM models with methods for structured output, multimodal processing, and prompt display.
|
||||
|
||||
Methods:
|
||||
- acreate_structured_output(text_input: str, system_prompt: str, response_model:
|
||||
Type[BaseModel])
|
||||
- show_prompt(text_input: str, system_prompt: str)
|
||||
- acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel])
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import litellm
|
||||
import instructor
|
||||
from pydantic import BaseModel
|
||||
from typing import Type
|
||||
from typing import Type, Optional
|
||||
from litellm import JSONSchemaValidationError
|
||||
|
||||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||
GenericAPIAdapter,
|
||||
)
|
||||
from cognee.infrastructure.llm.config import get_llm_config
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
|
|
@ -20,12 +20,14 @@ from tenacity import (
|
|||
retry_if_not_exception_type,
|
||||
before_sleep_log,
|
||||
)
|
||||
from ..types import TranscriptionReturnType
|
||||
from mistralai import Mistral
|
||||
|
||||
logger = get_logger()
|
||||
observe = get_observe()
|
||||
|
||||
|
||||
class MistralAdapter(LLMInterface):
|
||||
class MistralAdapter(GenericAPIAdapter):
|
||||
"""
|
||||
Adapter for Mistral AI API, for structured output generation and prompt display.
|
||||
|
||||
|
|
@ -34,10 +36,6 @@ class MistralAdapter(LLMInterface):
|
|||
- show_prompt
|
||||
"""
|
||||
|
||||
name = "Mistral"
|
||||
model: str
|
||||
api_key: str
|
||||
max_completion_tokens: int
|
||||
default_instructor_mode = "mistral_tools"
|
||||
|
||||
def __init__(
|
||||
|
|
@ -46,12 +44,19 @@ class MistralAdapter(LLMInterface):
|
|||
model: str,
|
||||
max_completion_tokens: int,
|
||||
endpoint: str = None,
|
||||
transcription_model: str = None,
|
||||
image_transcribe_model: str = None,
|
||||
instructor_mode: str = None,
|
||||
):
|
||||
from mistralai import Mistral
|
||||
|
||||
self.model = model
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
name="Mistral",
|
||||
endpoint=endpoint,
|
||||
transcription_model=transcription_model,
|
||||
image_transcribe_model=image_transcribe_model,
|
||||
)
|
||||
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
|
||||
|
|
@ -60,7 +65,9 @@ class MistralAdapter(LLMInterface):
|
|||
mode=instructor.Mode(self.instructor_mode),
|
||||
api_key=get_llm_config().llm_api_key,
|
||||
)
|
||||
self.mistral_client = Mistral(api_key=self.api_key)
|
||||
|
||||
@observe(as_type="generation")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(8, 128),
|
||||
|
|
@ -119,3 +126,41 @@ class MistralAdapter(LLMInterface):
|
|||
logger.error(f"Schema validation failed: {str(e)}")
|
||||
logger.debug(f"Raw response: {e.raw_response}")
|
||||
raise ValueError(f"Response failed schema validation: {str(e)}")
|
||||
|
||||
@observe(as_type="transcription")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def create_transcript(self, input) -> Optional[TranscriptionReturnType]:
|
||||
"""
|
||||
Generate an audio transcript from a user query.
|
||||
|
||||
This method creates a transcript from the specified audio file.
|
||||
The audio file is processed and the transcription is retrieved from the API.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- input: The path to the audio file that needs to be transcribed.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
The generated transcription of the audio file.
|
||||
"""
|
||||
transcription_model = self.transcription_model
|
||||
if self.transcription_model.startswith("mistral"):
|
||||
transcription_model = self.transcription_model.split("/")[-1]
|
||||
file_name = input.split("/")[-1]
|
||||
async with open_data_file(input, mode="rb") as f:
|
||||
transcription_response = self.mistral_client.audio.transcriptions.complete(
|
||||
model=transcription_model,
|
||||
file={
|
||||
"content": f,
|
||||
"file_name": file_name,
|
||||
},
|
||||
)
|
||||
|
||||
return TranscriptionReturnType(transcription_response.text, transcription_response)
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
|
|||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_delay,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import base64
|
||||
import litellm
|
||||
import instructor
|
||||
from typing import Type
|
||||
|
|
@ -16,8 +15,8 @@ from tenacity import (
|
|||
before_sleep_log,
|
||||
)
|
||||
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
||||
LLMInterface,
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
||||
GenericAPIAdapter,
|
||||
)
|
||||
from cognee.infrastructure.llm.exceptions import (
|
||||
ContentPolicyFilterError,
|
||||
|
|
@ -26,13 +25,16 @@ from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
|
|||
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.types import (
|
||||
TranscriptionReturnType,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
observe = get_observe()
|
||||
|
||||
|
||||
class OpenAIAdapter(LLMInterface):
|
||||
class OpenAIAdapter(GenericAPIAdapter):
|
||||
"""
|
||||
Adapter for OpenAI's GPT-3, GPT-4 API.
|
||||
|
||||
|
|
@ -53,12 +55,7 @@ class OpenAIAdapter(LLMInterface):
|
|||
- MAX_RETRIES
|
||||
"""
|
||||
|
||||
name = "OpenAI"
|
||||
model: str
|
||||
api_key: str
|
||||
api_version: str
|
||||
default_instructor_mode = "json_schema_mode"
|
||||
|
||||
MAX_RETRIES = 5
|
||||
|
||||
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
||||
|
|
@ -66,17 +63,29 @@ class OpenAIAdapter(LLMInterface):
|
|||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
endpoint: str,
|
||||
api_version: str,
|
||||
model: str,
|
||||
transcription_model: str,
|
||||
max_completion_tokens: int,
|
||||
endpoint: str = None,
|
||||
api_version: str = None,
|
||||
transcription_model: str = None,
|
||||
instructor_mode: str = None,
|
||||
streaming: bool = False,
|
||||
fallback_model: str = None,
|
||||
fallback_api_key: str = None,
|
||||
fallback_endpoint: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
name="OpenAI",
|
||||
endpoint=endpoint,
|
||||
api_version=api_version,
|
||||
transcription_model=transcription_model,
|
||||
fallback_model=fallback_model,
|
||||
fallback_api_key=fallback_api_key,
|
||||
fallback_endpoint=fallback_endpoint,
|
||||
)
|
||||
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
||||
# TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs.
|
||||
# Make sure all new gpt models will work with this mode as well.
|
||||
|
|
@ -91,18 +100,8 @@ class OpenAIAdapter(LLMInterface):
|
|||
self.aclient = instructor.from_litellm(litellm.acompletion)
|
||||
self.client = instructor.from_litellm(litellm.completion)
|
||||
|
||||
self.transcription_model = transcription_model
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.endpoint = endpoint
|
||||
self.api_version = api_version
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
self.streaming = streaming
|
||||
|
||||
self.fallback_model = fallback_model
|
||||
self.fallback_api_key = fallback_api_key
|
||||
self.fallback_endpoint = fallback_endpoint
|
||||
|
||||
@observe(as_type="generation")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
|
|
@ -198,7 +197,7 @@ class OpenAIAdapter(LLMInterface):
|
|||
f"The provided input contains content that is not aligned with our content policy: {text_input}"
|
||||
) from error
|
||||
|
||||
@observe
|
||||
@observe(as_type="transcription")
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
|
|
@ -206,58 +205,7 @@ class OpenAIAdapter(LLMInterface):
|
|||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
def create_structured_output(
|
||||
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
|
||||
) -> BaseModel:
|
||||
"""
|
||||
Generate a response from a user query.
|
||||
|
||||
This method creates structured output by sending a synchronous request to the OpenAI API
|
||||
using the provided parameters to generate a completion based on the user input and
|
||||
system prompt.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- text_input (str): The input text provided by the user for generating a response.
|
||||
- system_prompt (str): The system's prompt to guide the model's response.
|
||||
- response_model (Type[BaseModel]): The expected model type for the response.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- BaseModel: A structured output generated by the model, returned as an instance of
|
||||
BaseModel.
|
||||
"""
|
||||
|
||||
return self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""{text_input}""",
|
||||
},
|
||||
{
|
||||
"role": "system",
|
||||
"content": system_prompt,
|
||||
},
|
||||
],
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
response_model=response_model,
|
||||
max_retries=self.MAX_RETRIES,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def create_transcript(self, input, **kwargs):
|
||||
async def create_transcript(self, input, **kwargs) -> TranscriptionReturnType:
|
||||
"""
|
||||
Generate an audio transcript from a user query.
|
||||
|
||||
|
|
@ -286,60 +234,6 @@ class OpenAIAdapter(LLMInterface):
|
|||
max_retries=self.MAX_RETRIES,
|
||||
**kwargs,
|
||||
)
|
||||
return TranscriptionReturnType(transcription.text, transcription)
|
||||
|
||||
return transcription
|
||||
|
||||
@retry(
|
||||
stop=stop_after_delay(128),
|
||||
wait=wait_exponential_jitter(2, 128),
|
||||
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||
reraise=True,
|
||||
)
|
||||
async def transcribe_image(self, input, **kwargs) -> BaseModel:
|
||||
"""
|
||||
Generate a transcription of an image from a user query.
|
||||
|
||||
This method encodes the image and sends a request to the OpenAI API to obtain a
|
||||
description of the contents of the image.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- input: The path to the image file that needs to be transcribed.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- BaseModel: A structured output generated by the model, returned as an instance of
|
||||
BaseModel.
|
||||
"""
|
||||
async with open_data_file(input, mode="rb") as image_file:
|
||||
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
return litellm.completion(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What's in this image?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{encoded_image}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
api_key=self.api_key,
|
||||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
max_completion_tokens=300,
|
||||
max_retries=self.MAX_RETRIES,
|
||||
**kwargs,
|
||||
)
|
||||
# transcribe_image is inherited from GenericAPIAdapter
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class TranscriptionReturnType:
|
||||
text: str
|
||||
payload: BaseModel
|
||||
|
||||
def __init__(self, text: str, payload: BaseModel):
|
||||
self.text = text
|
||||
self.payload = payload
|
||||
|
|
@ -33,5 +33,4 @@ class DocumentChunk(DataPoint):
|
|||
cut_type: str
|
||||
is_part_of: Document
|
||||
contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None
|
||||
|
||||
metadata: dict = {"index_fields": ["text"]}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class Data(Base):
|
|||
__tablename__ = "data"
|
||||
|
||||
id = Column(UUID, primary_key=True, default=uuid4)
|
||||
|
||||
label = Column(String, nullable=True)
|
||||
name = Column(String)
|
||||
extension = Column(String)
|
||||
mime_type = Column(String)
|
||||
|
|
@ -36,6 +36,7 @@ class Data(Base):
|
|||
data_size = Column(Integer, nullable=True) # File size in bytes
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
||||
last_accessed = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
datasets = relationship(
|
||||
"Dataset",
|
||||
|
|
@ -49,6 +50,7 @@ class Data(Base):
|
|||
return {
|
||||
"id": str(self.id),
|
||||
"name": self.name,
|
||||
"label": self.label,
|
||||
"extension": self.extension,
|
||||
"mimeType": self.mime_type,
|
||||
"rawDataLocation": self.raw_data_location,
|
||||
|
|
|
|||
|
|
@ -7,5 +7,4 @@ class Entity(DataPoint):
|
|||
name: str
|
||||
is_a: Optional[EntityType] = None
|
||||
description: str
|
||||
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import time
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from typing import List, Dict, Union, Optional, Type
|
||||
from typing import List, Dict, Union, Optional, Type, Iterable, Tuple, Callable, Any
|
||||
|
||||
from cognee.modules.graph.exceptions import (
|
||||
EntityNotFoundError,
|
||||
|
|
@ -25,12 +25,16 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
|
||||
nodes: Dict[str, Node]
|
||||
edges: List[Edge]
|
||||
edges_by_distance_key: Dict[str, List[Edge]]
|
||||
directed: bool
|
||||
triplet_distance_penalty: float
|
||||
|
||||
def __init__(self, directed: bool = True):
|
||||
self.nodes = {}
|
||||
self.edges = []
|
||||
self.edges_by_distance_key = {}
|
||||
self.directed = directed
|
||||
self.triplet_distance_penalty = 3.5
|
||||
|
||||
def add_node(self, node: Node) -> None:
|
||||
if node.id not in self.nodes:
|
||||
|
|
@ -42,6 +46,12 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
self.edges.append(edge)
|
||||
edge.node1.add_skeleton_edge(edge)
|
||||
edge.node2.add_skeleton_edge(edge)
|
||||
key = edge.get_distance_key()
|
||||
if not key:
|
||||
return
|
||||
if key not in self.edges_by_distance_key:
|
||||
self.edges_by_distance_key[key] = []
|
||||
self.edges_by_distance_key[key].append(edge)
|
||||
|
||||
def get_node(self, node_id: str) -> Node:
|
||||
return self.nodes.get(node_id, None)
|
||||
|
|
@ -56,6 +66,29 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
def get_edges(self) -> List[Edge]:
|
||||
return self.edges
|
||||
|
||||
def reset_distances(self, collection: Iterable[Union[Node, Edge]], query_count: int) -> None:
|
||||
"""Reset vector distances for a collection of nodes or edges."""
|
||||
for item in collection:
|
||||
item.reset_vector_distances(query_count, self.triplet_distance_penalty)
|
||||
|
||||
def _normalize_query_distance_lists(
|
||||
self, distances: List, query_list_length: Optional[int] = None, name: str = "distances"
|
||||
) -> List:
|
||||
"""Normalize shape: flat list -> single-query; nested list -> multi-query."""
|
||||
if not distances:
|
||||
return []
|
||||
first_item = distances[0]
|
||||
if isinstance(first_item, (list, tuple)):
|
||||
per_query_lists = distances
|
||||
else:
|
||||
per_query_lists = [distances]
|
||||
if query_list_length is not None and len(per_query_lists) != query_list_length:
|
||||
raise ValueError(
|
||||
f"{name} has {len(per_query_lists)} query lists, "
|
||||
f"but query_list_length is {query_list_length}"
|
||||
)
|
||||
return per_query_lists
|
||||
|
||||
async def _get_nodeset_subgraph(
|
||||
self,
|
||||
adapter,
|
||||
|
|
@ -148,7 +181,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
adapter, memory_fragment_filter
|
||||
)
|
||||
|
||||
import time
|
||||
self.triplet_distance_penalty = triplet_distance_penalty
|
||||
|
||||
start_time = time.time()
|
||||
# Process nodes
|
||||
|
|
@ -200,41 +233,123 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
logger.error(f"Error during graph projection: {str(e)}")
|
||||
raise
|
||||
|
||||
async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
|
||||
mapped_nodes = 0
|
||||
for category, scored_results in node_distances.items():
|
||||
for scored_result in scored_results:
|
||||
node_id = str(scored_result.id)
|
||||
score = scored_result.score
|
||||
node = self.get_node(node_id)
|
||||
if node:
|
||||
node.add_attribute("vector_distance", score)
|
||||
mapped_nodes += 1
|
||||
async def map_vector_distances_to_graph_nodes(
|
||||
self,
|
||||
node_distances,
|
||||
query_list_length: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Map vector distances to nodes, supporting single- and multi-query input shapes."""
|
||||
|
||||
async def map_vector_distances_to_graph_edges(self, edge_distances) -> None:
|
||||
try:
|
||||
if edge_distances is None:
|
||||
return
|
||||
query_count = query_list_length or 1
|
||||
|
||||
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
|
||||
self.reset_distances(self.nodes.values(), query_count)
|
||||
|
||||
for edge in self.edges:
|
||||
edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
|
||||
"relationship_type"
|
||||
)
|
||||
distance = embedding_map.get(edge_key, None)
|
||||
if distance is not None:
|
||||
edge.attributes["vector_distance"] = distance
|
||||
for collection_name, scored_results in node_distances.items():
|
||||
if not scored_results:
|
||||
continue
|
||||
|
||||
except Exception as ex:
|
||||
logger.error(f"Error mapping vector distances to edges: {str(ex)}")
|
||||
raise ex
|
||||
per_query_scored_results = self._normalize_query_distance_lists(
|
||||
scored_results, query_list_length, f"Collection '{collection_name}'"
|
||||
)
|
||||
|
||||
async def calculate_top_triplet_importances(self, k: int) -> List[Edge]:
|
||||
def score(edge):
|
||||
n1 = edge.node1.attributes.get("vector_distance", 1)
|
||||
n2 = edge.node2.attributes.get("vector_distance", 1)
|
||||
e = edge.attributes.get("vector_distance", 1)
|
||||
return n1 + n2 + e
|
||||
for query_index, scored_results in enumerate(per_query_scored_results):
|
||||
for result in scored_results:
|
||||
node_id = str(getattr(result, "id", None))
|
||||
if not node_id:
|
||||
continue
|
||||
node = self.get_node(node_id)
|
||||
if node is None:
|
||||
continue
|
||||
score = float(getattr(result, "score", self.triplet_distance_penalty))
|
||||
node.update_distance_for_query(
|
||||
query_index=query_index,
|
||||
score=score,
|
||||
query_count=query_count,
|
||||
default_penalty=self.triplet_distance_penalty,
|
||||
)
|
||||
|
||||
async def map_vector_distances_to_graph_edges(
|
||||
self,
|
||||
edge_distances,
|
||||
query_list_length: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Map vector distances to graph edges, supporting single- and multi-query input shapes."""
|
||||
query_count = query_list_length or 1
|
||||
|
||||
self.reset_distances(self.edges, query_count)
|
||||
|
||||
if not edge_distances:
|
||||
return None
|
||||
|
||||
per_query_scored_results = self._normalize_query_distance_lists(
|
||||
edge_distances, query_list_length, "edge_distances"
|
||||
)
|
||||
|
||||
for query_index, scored_results in enumerate(per_query_scored_results):
|
||||
for result in scored_results:
|
||||
payload = getattr(result, "payload", None)
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
text = payload.get("text")
|
||||
if not text:
|
||||
continue
|
||||
matching_edges = self.edges_by_distance_key.get(str(text))
|
||||
if not matching_edges:
|
||||
continue
|
||||
for edge in matching_edges:
|
||||
edge.update_distance_for_query(
|
||||
query_index=query_index,
|
||||
score=float(getattr(result, "score", self.triplet_distance_penalty)),
|
||||
query_count=query_count,
|
||||
default_penalty=self.triplet_distance_penalty,
|
||||
)
|
||||
|
||||
def _calculate_query_top_triplet_importances(
|
||||
self,
|
||||
k: int,
|
||||
query_index: int = 0,
|
||||
) -> List[Edge]:
|
||||
"""Calculate top k triplet importances for a specific query index."""
|
||||
|
||||
def score(edge: Edge) -> float:
|
||||
elements = (
|
||||
(edge.node1, f"node {edge.node1.id}"),
|
||||
(edge.node2, f"node {edge.node2.id}"),
|
||||
(edge, f"edge {edge.node1.id}->{edge.node2.id}"),
|
||||
)
|
||||
|
||||
importances = []
|
||||
for element, label in elements:
|
||||
distances = element.attributes.get("vector_distance")
|
||||
if not isinstance(distances, list) or query_index >= len(distances):
|
||||
raise ValueError(
|
||||
f"{label}: vector_distance must be a list with length > {query_index} "
|
||||
f"before scoring (got {type(distances).__name__} with length "
|
||||
f"{len(distances) if isinstance(distances, list) else 'n/a'})"
|
||||
)
|
||||
value = distances[query_index]
|
||||
try:
|
||||
importances.append(float(value))
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError(
|
||||
f"{label}: vector_distance[{query_index}] must be float-like, "
|
||||
f"got {type(value).__name__}"
|
||||
)
|
||||
|
||||
return sum(importances)
|
||||
|
||||
return heapq.nsmallest(k, self.edges, key=score)
|
||||
|
||||
async def calculate_top_triplet_importances(
|
||||
self, k: int, query_list_length: Optional[int] = None
|
||||
) -> Union[List[Edge], List[List[Edge]]]:
|
||||
"""Calculate top k triplet importances, supporting both single and multi-query modes."""
|
||||
query_count = query_list_length or 1
|
||||
results = [
|
||||
self._calculate_query_top_triplet_importances(k=k, query_index=i)
|
||||
for i in range(query_count)
|
||||
]
|
||||
|
||||
if query_list_length is None:
|
||||
return results[0]
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -30,11 +30,31 @@ class Node:
|
|||
raise InvalidDimensionsError()
|
||||
self.id = node_id
|
||||
self.attributes = attributes if attributes is not None else {}
|
||||
self.attributes["vector_distance"] = node_penalty
|
||||
self.attributes["vector_distance"] = None
|
||||
self.skeleton_neighbours = []
|
||||
self.skeleton_edges = []
|
||||
self.status = np.ones(dimension, dtype=int)
|
||||
|
||||
def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
|
||||
self.attributes["vector_distance"] = [default_penalty] * query_count
|
||||
|
||||
def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
|
||||
distances = self.attributes.get("vector_distance")
|
||||
if not isinstance(distances, list) or len(distances) != query_count:
|
||||
distances = [default_penalty] * query_count
|
||||
self.attributes["vector_distance"] = distances
|
||||
return distances
|
||||
|
||||
def update_distance_for_query(
|
||||
self,
|
||||
query_index: int,
|
||||
score: float,
|
||||
query_count: int,
|
||||
default_penalty: float,
|
||||
) -> None:
|
||||
distances = self.ensure_vector_distance_list(query_count, default_penalty)
|
||||
distances[query_index] = score
|
||||
|
||||
def add_skeleton_neighbor(self, neighbor: "Node") -> None:
|
||||
if neighbor not in self.skeleton_neighbours:
|
||||
self.skeleton_neighbours.append(neighbor)
|
||||
|
|
@ -116,10 +136,36 @@ class Edge:
|
|||
self.node1 = node1
|
||||
self.node2 = node2
|
||||
self.attributes = attributes if attributes is not None else {}
|
||||
self.attributes["vector_distance"] = edge_penalty
|
||||
self.attributes["vector_distance"] = None
|
||||
self.directed = directed
|
||||
self.status = np.ones(dimension, dtype=int)
|
||||
|
||||
def get_distance_key(self) -> Optional[str]:
|
||||
key = self.attributes.get("edge_text") or self.attributes.get("relationship_type")
|
||||
if key is None:
|
||||
return None
|
||||
return str(key)
|
||||
|
||||
def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
|
||||
self.attributes["vector_distance"] = [default_penalty] * query_count
|
||||
|
||||
def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
|
||||
distances = self.attributes.get("vector_distance")
|
||||
if not isinstance(distances, list) or len(distances) != query_count:
|
||||
distances = [default_penalty] * query_count
|
||||
self.attributes["vector_distance"] = distances
|
||||
return distances
|
||||
|
||||
def update_distance_for_query(
|
||||
self,
|
||||
query_index: int,
|
||||
score: float,
|
||||
query_count: int,
|
||||
default_penalty: float,
|
||||
) -> None:
|
||||
distances = self.ensure_vector_distance_list(query_count, default_penalty)
|
||||
distances[query_index] = score
|
||||
|
||||
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
|
||||
if dimension < 0 or dimension >= len(self.status):
|
||||
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ from .retrieve_existing_edges import retrieve_existing_edges
|
|||
from .convert_node_to_data_point import convert_node_to_data_point
|
||||
from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges
|
||||
from .resolve_edges_to_text import resolve_edges_to_text
|
||||
from .get_entity_nodes_from_triplets import get_entity_nodes_from_triplets
|
||||
|
|
|
|||
12
cognee/modules/graph/utils/get_entity_nodes_from_triplets.py
Normal file
12
cognee/modules/graph/utils/get_entity_nodes_from_triplets.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
def get_entity_nodes_from_triplets(triplets):
|
||||
entity_nodes = []
|
||||
seen_ids = set()
|
||||
for triplet in triplets:
|
||||
if hasattr(triplet, "node1") and triplet.node1 and triplet.node1.id not in seen_ids:
|
||||
entity_nodes.append({"id": str(triplet.node1.id)})
|
||||
seen_ids.add(triplet.node1.id)
|
||||
if hasattr(triplet, "node2") and triplet.node2 and triplet.node2.id not in seen_ids:
|
||||
entity_nodes.append({"id": str(triplet.node2.id)})
|
||||
seen_ids.add(triplet.node2.id)
|
||||
|
||||
return entity_nodes
|
||||
|
|
@ -3,3 +3,4 @@ from .get_notebooks import get_notebooks
|
|||
from .create_notebook import create_notebook
|
||||
from .update_notebook import update_notebook
|
||||
from .delete_notebook import delete_notebook
|
||||
from .create_tutorial_notebooks import create_tutorial_notebooks
|
||||
|
|
|
|||
|
|
@ -6,40 +6,6 @@ from cognee.infrastructure.databases.relational import with_async_session
|
|||
|
||||
from ..models.Notebook import Notebook, NotebookCell
|
||||
|
||||
TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
|
||||
|
||||
|
||||
async def _create_tutorial_notebook(
|
||||
user_id: UUID, session: AsyncSession, force_refresh: bool = False
|
||||
) -> None:
|
||||
"""
|
||||
Create the default tutorial notebook for new users.
|
||||
Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
|
||||
"""
|
||||
TUTORIAL_ZIP_URL = (
|
||||
"https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create notebook from remote zip file (includes notebook + data files)
|
||||
notebook = await Notebook.from_ipynb_zip_url(
|
||||
zip_url=TUTORIAL_ZIP_URL,
|
||||
owner_id=user_id,
|
||||
notebook_filename="tutorial.ipynb",
|
||||
name=TUTORIAL_NOTEBOOK_NAME,
|
||||
deletable=False,
|
||||
force=force_refresh,
|
||||
)
|
||||
|
||||
# Add to session and commit
|
||||
session.add(notebook)
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
|
||||
|
||||
raise e
|
||||
|
||||
|
||||
@with_async_session
|
||||
async def create_notebook(
|
||||
|
|
|
|||
191
cognee/modules/notebooks/methods/create_tutorial_notebooks.py
Normal file
191
cognee/modules/notebooks/methods/create_tutorial_notebooks.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
from pathlib import Path
|
||||
from uuid import NAMESPACE_OID, UUID, uuid5, uuid4
|
||||
from typing import List, Optional, Dict, Any
|
||||
import re
|
||||
import json
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.root_dir import ROOT_DIR
|
||||
|
||||
from ..models.Notebook import Notebook, NotebookCell
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def _get_tutorials_directory() -> Path:
|
||||
"""Get the path to the tutorials directory."""
|
||||
return ROOT_DIR / "modules" / "notebooks" / "tutorials"
|
||||
|
||||
|
||||
def _parse_cell_index(filename: str) -> int:
|
||||
"""Extract cell index from filename like 'cell-0.md' or 'cell-123.py'."""
|
||||
match = re.search(r"cell-(\d+)", filename)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return -1
|
||||
|
||||
|
||||
def _get_cell_type(file_path: Path) -> str:
|
||||
"""Determine cell type from file extension."""
|
||||
extension = file_path.suffix.lower()
|
||||
if extension == ".md":
|
||||
return "markdown"
|
||||
elif extension == ".py":
|
||||
return "code"
|
||||
else:
|
||||
raise ValueError(f"Unsupported cell file type: {extension}")
|
||||
|
||||
|
||||
def _extract_markdown_heading(content: str) -> str | None:
|
||||
"""Extract the first markdown heading from content."""
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
# Match lines starting with one or more # followed by space and text
|
||||
match = re.match(r"^#+\s+(.+)$", line)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return None
|
||||
|
||||
|
||||
def _get_cell_name(cell_file: Path, cell_type: str, content: str) -> str:
|
||||
"""Get the appropriate name for a cell."""
|
||||
if cell_type == "code":
|
||||
return "Code Cell"
|
||||
elif cell_type == "markdown":
|
||||
heading = _extract_markdown_heading(content)
|
||||
if heading:
|
||||
return heading
|
||||
# Fallback to filename stem
|
||||
return cell_file.stem
|
||||
|
||||
|
||||
def _load_tutorial_cells(tutorial_dir: Path) -> List[NotebookCell]:
|
||||
"""Load all cells from a tutorial directory, sorted by cell index."""
|
||||
cells = []
|
||||
|
||||
cell_files = [
|
||||
file_path
|
||||
for file_path in tutorial_dir.iterdir()
|
||||
if file_path.is_file()
|
||||
and file_path.name.startswith("cell-")
|
||||
and file_path.suffix in [".md", ".py"]
|
||||
]
|
||||
|
||||
cell_files.sort(key=lambda file_path: _parse_cell_index(file_path.name))
|
||||
|
||||
for cell_file in cell_files:
|
||||
try:
|
||||
cell_type = _get_cell_type(cell_file)
|
||||
content = cell_file.read_text(encoding="utf-8")
|
||||
cell_name = _get_cell_name(cell_file, cell_type, content)
|
||||
|
||||
cells.append(
|
||||
NotebookCell(
|
||||
id=uuid4(),
|
||||
type=cell_type,
|
||||
name=cell_name,
|
||||
content=content,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load cell {cell_file}: {e}")
|
||||
continue
|
||||
|
||||
return cells
|
||||
|
||||
|
||||
def _read_tutorial_config(tutorial_dir: Path) -> Optional[Dict[str, Any]]:
|
||||
"""Read config.json from a tutorial directory if it exists."""
|
||||
config_path = tutorial_dir / "config.json"
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Failed to read config.json from {tutorial_dir}: {e}")
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _format_tutorial_name(tutorial_dir_name: str) -> str:
|
||||
"""Format tutorial directory name into a readable notebook name (fallback)."""
|
||||
|
||||
name = tutorial_dir_name.replace("-", " ").replace("_", " ")
|
||||
return f"{name.capitalize()} - tutorial 🧠"
|
||||
|
||||
|
||||
async def create_tutorial_notebooks(user_id: UUID, session: AsyncSession) -> None:
|
||||
"""
|
||||
Create tutorial notebooks for all tutorials found in the tutorials directory.
|
||||
Each tutorial directory will become a separate notebook.
|
||||
"""
|
||||
try:
|
||||
tutorials_dir = _get_tutorials_directory()
|
||||
|
||||
if not tutorials_dir.exists():
|
||||
logger.warning(f"Tutorials directory not found: {tutorials_dir}")
|
||||
return
|
||||
|
||||
tutorial_dirs = [
|
||||
d for d in tutorials_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
|
||||
]
|
||||
|
||||
if not tutorial_dirs:
|
||||
logger.warning(f"No tutorial directories found in {tutorials_dir}")
|
||||
return
|
||||
|
||||
notebooks_to_add = []
|
||||
|
||||
for tutorial_dir in tutorial_dirs:
|
||||
try:
|
||||
cells = _load_tutorial_cells(tutorial_dir)
|
||||
|
||||
if not cells:
|
||||
logger.warning(f"No cells found in tutorial directory: {tutorial_dir}")
|
||||
continue
|
||||
|
||||
config = _read_tutorial_config(tutorial_dir)
|
||||
|
||||
# Use name from config.json, or fallback to formatted directory name
|
||||
if config and "name" in config:
|
||||
notebook_name = config["name"]
|
||||
else:
|
||||
notebook_name = _format_tutorial_name(tutorial_dir.name)
|
||||
logger.warning(
|
||||
f"No config.json or 'name' field found in {tutorial_dir}, "
|
||||
f"using fallback name: {notebook_name}"
|
||||
)
|
||||
|
||||
# Use deletable flag from config.json, or default to False for tutorials
|
||||
deletable = False
|
||||
if config and "deletable" in config:
|
||||
deletable = bool(config["deletable"])
|
||||
|
||||
notebook_id = uuid5(NAMESPACE_OID, name=notebook_name)
|
||||
|
||||
notebook = Notebook(
|
||||
id=notebook_id,
|
||||
owner_id=user_id,
|
||||
name=notebook_name,
|
||||
cells=cells,
|
||||
deletable=deletable,
|
||||
)
|
||||
|
||||
notebooks_to_add.append(notebook)
|
||||
logger.info(f"Created tutorial notebook: {notebook_name} with {len(cells)} cells")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create tutorial notebook from {tutorial_dir}: {e}")
|
||||
continue
|
||||
|
||||
if not notebooks_to_add:
|
||||
return
|
||||
|
||||
for notebook in notebooks_to_add:
|
||||
session.add(notebook)
|
||||
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create tutorial notebooks for user {user_id}: {e}")
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from uuid import UUID
|
||||
from uuid import NAMESPACE_OID, UUID, uuid5
|
||||
from typing import List
|
||||
from sqlalchemy import select, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
from cognee.infrastructure.databases.relational import with_async_session
|
||||
|
||||
from ..models.Notebook import Notebook
|
||||
from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
|
||||
from .create_tutorial_notebooks import create_tutorial_notebooks
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
|
|
@ -19,21 +19,25 @@ async def get_notebooks(
|
|||
session: AsyncSession,
|
||||
) -> List[Notebook]:
|
||||
# Check if tutorial notebook already exists for this user
|
||||
tutorial_notebook_ids = [
|
||||
uuid5(NAMESPACE_OID, name="Cognee Basics - tutorial 🧠"),
|
||||
uuid5(NAMESPACE_OID, name="Python Development with Cognee - tutorial 🧠"),
|
||||
]
|
||||
tutorial_query = select(Notebook).where(
|
||||
and_(
|
||||
Notebook.owner_id == user_id,
|
||||
Notebook.name == TUTORIAL_NOTEBOOK_NAME,
|
||||
Notebook.id.in_(tutorial_notebook_ids),
|
||||
~Notebook.deletable,
|
||||
)
|
||||
)
|
||||
tutorial_result = await session.execute(tutorial_query)
|
||||
tutorial_notebook = tutorial_result.scalar_one_or_none()
|
||||
tutorial_notebooks = tutorial_result.scalars().all()
|
||||
|
||||
# If tutorial notebook doesn't exist, create it
|
||||
if tutorial_notebook is None:
|
||||
logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
|
||||
# If tutorial notebooks don't exist, create them
|
||||
if len(tutorial_notebooks) == 0:
|
||||
logger.info(f"Tutorial notebooks not found for user {user_id}, creating them")
|
||||
try:
|
||||
await _create_tutorial_notebook(user_id, session, force_refresh=False)
|
||||
await create_tutorial_notebooks(user_id, session)
|
||||
except Exception as e:
|
||||
# Log the error but continue to return existing notebooks
|
||||
logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
# Use Cognee to Build Your Own Knowledge Graph
|
||||
|
||||
Cognee is a tool that allows you to build your own knowledge graph from the data you have.
|
||||
10
cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md
Normal file
10
cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# What You'll Learn in This Tutorial
|
||||
|
||||
In this tutorial, you'll learn how to use Cognee to transform scattered data into an intelligent knowledge system that enhances your workflow.
|
||||
By the end, you'll have:
|
||||
|
||||
- Connected disparate data sources into a unified AI memory graph
|
||||
- Built a memory layer that infers knowledge from provided data
|
||||
- Learn how to use search capabilities that combine the diverse context
|
||||
|
||||
This tutorial demonstrates the power of knowledge graphs and retrieval-augmented generation (RAG), showing you how to build systems that learn from data and infer knowledge.
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
# Cognee and Its Core Operations
|
||||
|
||||
Before we dive in, let's understand the core Cognee operations we'll be working with:
|
||||
|
||||
- `cognee.add()` - Ingests raw data into the system
|
||||
- `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
|
||||
- `cognee.search()` - Queries the knowledge graph with natural language
|
||||
28
cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py
Normal file
28
cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Add data one by one, or pass a list to add multiple items at once
|
||||
|
||||
await cognee.add(
|
||||
"Harry Potter is a student at Hogwarts and belongs to Gryffindor house. \
|
||||
He is known for defeating Voldemort and his Patronus is a stag.",
|
||||
dataset_name="cognee-basics",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
"Hermione Granger is a student at Hogwarts and also belongs to Gryffindor house. \
|
||||
She is known for her intelligence and deep knowledge of spells. Her Patronus is an otter.",
|
||||
dataset_name="cognee-basics",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
"Severus Snape is a professor at Hogwarts who teaches Potions. \
|
||||
He belongs to Slytherin house and was secretly loyal to Albus Dumbledore.",
|
||||
dataset_name="cognee-basics",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
[
|
||||
"Hogwarts is a magical school located in Scotland. During Harry Potter's time at school, the headmaster was Albus Dumbledore.",
|
||||
"A Horcrux is a dark magic object used to store a fragment of a wizard's soul. Voldemort created multiple Horcruxes to achieve immortality.",
|
||||
"The Elder Wand is a powerful wand believed to be unbeatable. Its final known owner was Harry Potter.",
|
||||
],
|
||||
dataset_name="cognee-basics",
|
||||
)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Run cognify to process the data and create a knowledge graph
|
||||
|
||||
await cognee.cognify(datasets=["cognee-basics"])
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# And after the cognification, search the knowledge graph
|
||||
|
||||
result = await cognee.search(
|
||||
"Which characters belong to Gryffindor?",
|
||||
datasets=["cognee-basics"],
|
||||
)
|
||||
|
||||
# Print the result so you can see it in the notebook output
|
||||
print(result)
|
||||
17
cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py
Normal file
17
cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Run multiple searches and print the results
|
||||
|
||||
result_1 = await cognee.search(
|
||||
"Who taught Potions at Hogwarts at time Albus Dumbledore was the headmaster?",
|
||||
datasets=["cognee-basics"],
|
||||
)
|
||||
|
||||
# Print the result so you can see it in the notebook output
|
||||
print(result_1)
|
||||
|
||||
|
||||
result_2 = await cognee.search(
|
||||
"How to defeat Voldemort?",
|
||||
datasets=["cognee-basics"],
|
||||
)
|
||||
|
||||
print(result_2)
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"name": "Cognee Basics - tutorial 🧠",
|
||||
"deletable": false
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Using Cognee with Python Development Data
|
||||
|
||||
Unite authoritative Python practice (Guido van Rossum's own contributions!), normative guidance (Zen/PEP 8), and your lived context (rules + conversations) into one AI memory that produces answers that are relevant, explainable, and consistent.
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
You'll see that cognee has connected your Python development challenges with Guido's approaches, revealing patterns like:
|
||||
- "Type hint implementation failed due to circular imports - similar to issue Guido solved in mypy PR #1234"
|
||||
- "Performance bottleneck in list comprehension matches pattern Guido optimized in CPython commit abc123"
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Memify
|
||||
|
||||
Let's now introduce the memory functions. These algorithms run on top of your semantic layer, connecting the dots and improving the reasoning in search.
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
memify_result = await cognee.memify(dataset="python-development-with-cognee")
|
||||
|
||||
print(memify_result)
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
# What Memify Does in This Case
|
||||
|
||||
This advanced function uses AI to:
|
||||
- Infer rule patterns from your code (e.g., "When implementing iterators, always follow the protocol Guido established")
|
||||
- Connect design philosophy to practice (e.g., linking "explicit is better than implicit" to your type hinting decisions)
|
||||
|
||||
Now let's see how the system has connected your Python development patterns with established best practices:
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
# Search for connections between your async patterns and Python philosophy
|
||||
results = await cognee.search(
|
||||
query_text="How does my AsyncWebScraper implementation align with Python's design principles?",
|
||||
datasets=["python-development-with-cognee"],
|
||||
)
|
||||
print(results)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Temporal Graphs
|
||||
|
||||
As we used `temporal_cognify` option for each cognification, we can ask time related questions, for example:
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
result = await cognee.search(
|
||||
query_text="What can we learn from Guido's contributions in 2025?",
|
||||
query_type=cognee.SearchType.TEMPORAL,
|
||||
datasets=["python-development-with-cognee"],
|
||||
)
|
||||
|
||||
print(result)
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# What You'll Learn in This Tutorial
|
||||
|
||||
In this comprehensive tutorial, you'll discover how to transform scattered development data into an intelligent knowledge system that enhances your coding workflow. By the end, you'll have:
|
||||
- Connected disparate data sources (Guido's CPython contributions, mypy development, PEP discussions, your Python projects) into a unified AI memory graph
|
||||
- Built a memory layer that understands Python design philosophy, best practice coding patterns, and your preferences and experience
|
||||
- Learned how to use intelligent search capabilities that combine the diverse context
|
||||
- Integrated everything with your coding environment through MCP (Model Context Protocol)
|
||||
|
||||
This tutorial demonstrates the power of knowledge graphs and retrieval-augmented generation (RAG) for software development, showing you how to build systems that learn from Python's creator and improve your own Python development.
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
# Cognee and Its Core Operations
|
||||
|
||||
Before we dive in, let's understand the core Cognee operations we'll be working with:
|
||||
- `cognee.add()` - Ingests raw data (files, text, APIs) into the system
|
||||
- `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
|
||||
- `cognee.search()` - Queries the knowledge graph with natural language or Cypher
|
||||
- `cognee.memify()` - Cognee's "secret sauce" that infers implicit connections and rules from your data
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# Data Used in This Tutorial
|
||||
|
||||
Cognee can ingest many types of sources. In this tutorial, we use a small, concrete set of files that cover different perspectives:
|
||||
|
||||
- `guido_contributions.json` — Authoritative exemplars. Real PRs and commits from Guido van Rossum (mypy, CPython). These show how Python’s creator solved problems and provide concrete anchors for patterns.
|
||||
- `pep_style_guide.md` — Norms. Encodes community style and typing conventions (PEP 8 and related). Ensures that search results and inferred rules align with widely accepted standards.
|
||||
- `zen_principles.md` — Philosophy The Zen of Python. Grounds design trade‑offs (simplicity, explicitness, readability) beyond syntax or mechanics.
|
||||
- `my_developer_rules.md` — Local constraints Your house rules, conventions, and project‑specific requirements (scope, privacy, Spec.md). Keeps recommendations relevant to your actual workflow.
|
||||
- `copilot_conversations.json` — Personal history. Transcripts of real assistant conversations, including your questions, code snippets, and discussion topics. Captures “how you code” and connects it to “how Guido codes.”
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
# First Data Ingestion: Exploring Guido's Python Contributions
|
||||
|
||||
We'll begin with a document that contains detailed PRs and commits from Guido van Rossum's work on mypy and CPython, showing real-world examples of Python's creator solving type system and language design challenges.
|
||||
|
||||
We'll use Cognee's `add()` and `cognify()` functions to ingest this data and build a knowledge graph that connects Guido's development patterns with Python best practices.
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
# Add Guido's contributions to data
|
||||
await cognee.add(
|
||||
"d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/guido_contributions.json",
|
||||
node_set=["guido_data"],
|
||||
dataset_name="python-development-with-cognee",
|
||||
)
|
||||
|
||||
# Cognify added data into a knowledge graph
|
||||
await cognee.cognify(datasets=["python-development-with-cognee"], temporal_cognify=True)
|
||||
|
||||
# Search the knowledge graph
|
||||
results = await cognee.search("Show me commits", datasets=["python-development-with-cognee"])
|
||||
print(results)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# What Just Happened?
|
||||
|
||||
The `search()` function uses natural language to query a knowledge graph containing Guido's development history. Unlike traditional databases, Cognee understands the relationships between commits, language features, design decisions, and evolution over time.
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Ingesting More Data
|
||||
|
||||
Now we'll add the remaining data and see how the connections emerge between Guido's contributions, Python best practices, and user conversations.
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
await cognee.add(
|
||||
"d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/copilot_conversations.json",
|
||||
node_set=["developer_data"],
|
||||
dataset_name="python-development-with-cognee",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
"d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/my_developer_rules.md",
|
||||
node_set=["developer_data"],
|
||||
dataset_name="python-development-with-cognee",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
"d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/zen_principles.md",
|
||||
node_set=["principles_data"],
|
||||
dataset_name="python-development-with-cognee",
|
||||
)
|
||||
|
||||
await cognee.add(
|
||||
"d18g11dwdlgfey.cloudfront.net/tutorials/python-development-with-cognee/data/pep_style_guide.md",
|
||||
node_set=["principles_data"],
|
||||
dataset_name="python-development-with-cognee",
|
||||
)
|
||||
|
||||
await cognee.cognify(datasets=["python-development-with-cognee"], temporal_cognify=True)
|
||||
|
||||
results = await cognee.search(
|
||||
"What Python type hinting challenges did I face, and how does Guido approach similar problems in mypy?",
|
||||
datasets=["python-development-with-cognee"],
|
||||
use_combined_context=True, # Used to show reasoning graph visualization
|
||||
)
|
||||
print(results)
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"name": "Python Development with Cognee - tutorial 🧠",
|
||||
"deletable": false
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
{
|
||||
"conversations": [
|
||||
{
|
||||
"id": "conv_001",
|
||||
"timestamp": "2024-01-15T10:30:00Z",
|
||||
"topic": "async/await patterns",
|
||||
"user_query": "I'm building a web scraper that needs to handle thousands of URLs concurrently. What's the best way to structure this with asyncio?",
|
||||
"assistant_response": "For high-concurrency web scraping, you'll want to use asyncio with aiohttp and implement proper semaphore-based rate limiting. Here's a robust pattern:\n\n```python\nimport asyncio\nimport aiohttp\nfrom typing import List, Dict, Any\n\nclass AsyncWebScraper:\n def __init__(self, max_concurrent: int = 100):\n self.semaphore = asyncio.Semaphore(max_concurrent)\n self.session = None\n \n async def __aenter__(self):\n self.session = aiohttp.ClientSession(\n timeout=aiohttp.ClientTimeout(total=30),\n connector=aiohttp.TCPConnector(limit=100)\n )\n return self\n \n async def __aexit__(self, exc_type, exc_val, exc_tb):\n if self.session:\n await self.session.close()\n \n async def fetch_url(self, url: str) -> Dict[str, Any]:\n async with self.semaphore:\n try:\n async with self.session.get(url) as response:\n content = await response.text()\n return {\n 'url': url,\n 'status': response.status,\n 'content': content,\n 'headers': dict(response.headers)\n }\n except Exception as e:\n return {'url': url, 'error': str(e)}\n \n async def scrape_urls(self, urls: List[str]) -> List[Dict[str, Any]]:\n tasks = [self.fetch_url(url) for url in urls]\n return await asyncio.gather(*tasks, return_exceptions=True)\n\n# Usage\nasync def main():\n urls = ['http://example.com'] * 1000 # Your URLs here\n \n async with AsyncWebScraper(max_concurrent=50) as scraper:\n results = await scraper.scrape_urls(urls)\n \n successful = [r for r in results if 'error' not in r]\n print(f'Successfully scraped {len(successful)} URLs')\n\nif __name__ == '__main__':\n asyncio.run(main())\n```\n\nKey points:\n- Semaphore controls concurrency to avoid overwhelming servers\n- Context manager ensures proper cleanup\n- TCPConnector with connection pooling\n- Proper exception handling for failed requests",
|
||||
"code_context": {
|
||||
"file": "scraper.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["async/await", "context_managers", "semaphores", "aiohttp", "error_handling"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I add retry logic for failed requests?",
|
||||
"What's the best way to parse the scraped HTML content?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_002",
|
||||
"timestamp": "2024-01-16T14:20:00Z",
|
||||
"topic": "dataclass vs pydantic",
|
||||
"user_query": "When should I use dataclasses vs Pydantic models? I'm building an API and need to handle user input validation.",
|
||||
"assistant_response": "Great question! For API development with input validation, here's when to use each:\n\n**Use Pydantic when:**\n- Building APIs (FastAPI, Django REST)\n- Need runtime validation of external data\n- Want automatic JSON serialization/deserialization\n- Need complex validation rules\n- Working with configuration files\n\n**Use dataclasses when:**\n- Internal data structures\n- Simple data containers\n- Want to stay in stdlib\n- Performance is critical (dataclasses are faster)\n\nFor your API, I'd recommend Pydantic:\n\n```python\nfrom pydantic import BaseModel, validator, Field\nfrom typing import Optional, List\nfrom datetime import datetime\nfrom enum import Enum\n\nclass UserRole(str, Enum):\n ADMIN = 'admin'\n USER = 'user'\n GUEST = 'guest'\n\nclass CreateUserRequest(BaseModel):\n username: str = Field(..., min_length=3, max_length=50, regex=r'^[a-zA-Z0-9_]+$')\n email: str = Field(..., regex=r'^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$')\n password: str = Field(..., min_length=8)\n role: UserRole = UserRole.USER\n tags: Optional[List[str]] = None\n \n @validator('password')\n def validate_password(cls, v):\n if not any(c.isupper() for c in v):\n raise ValueError('Password must contain uppercase letter')\n if not any(c.isdigit() for c in v):\n raise ValueError('Password must contain digit')\n return v\n \n @validator('tags')\n def validate_tags(cls, v):\n if v and len(v) > 10:\n raise ValueError('Too many tags')\n return v\n\nclass User(BaseModel):\n id: int\n username: str\n email: str\n role: UserRole\n created_at: datetime\n is_active: bool = True\n \n class Config:\n orm_mode = True\n json_encoders = { datetime: lambda v: v.isoformat() }\n\n# FastAPI usage\nfrom fastapi import FastAPI, HTTPException\n\napp = FastAPI()\n\n@app.post('/users/', response_model=User)\nasync def create_user(user_data: CreateUserRequest):\n try:\n new_user = await create_user_in_db(user_data)\n return User.from_orm(new_user)\n except Exception as e:\n raise HTTPException(status_code=400, detail=str(e))\n```\n\nPydantic gives you:\n- Automatic request validation\n- Clear error messages\n- Type coercion\n- JSON Schema generation\n- IDE support with type hints",
|
||||
"code_context": {
|
||||
"file": "models.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["pydantic", "dataclasses", "validation", "fastapi", "type_hints", "enums"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I handle nested validation with Pydantic?",
|
||||
"Can I use Pydantic with SQLAlchemy models?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_003",
|
||||
"timestamp": "2024-01-17T09:15:00Z",
|
||||
"topic": "testing patterns",
|
||||
"user_query": "I'm struggling with testing async code and database interactions. What's the best approach for pytest with async functions?",
|
||||
"assistant_response": "Testing async code can be tricky! Here's a comprehensive approach using pytest-asyncio and proper test isolation...",
|
||||
"code_context": {
|
||||
"file": "test_users.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["pytest", "async_testing", "fixtures", "mocking", "database_testing", "fastapi_testing"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I test WebSocket connections?",
|
||||
"What's the best way to test database migrations?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_004",
|
||||
"timestamp": "2024-01-18T16:45:00Z",
|
||||
"topic": "performance optimization",
|
||||
"user_query": "My FastAPI app is getting slow with large datasets. How can I optimize database queries and response times?",
|
||||
"assistant_response": "Performance optimization in FastAPI apps typically involves several layers...",
|
||||
"code_context": {
|
||||
"file": "optimizations.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["performance_optimization", "caching", "database_optimization", "async_patterns", "monitoring"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I implement database connection pooling properly?",
|
||||
"What's the best way to handle memory usage with large datasets?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_005",
|
||||
"timestamp": "2024-01-19T11:30:00Z",
|
||||
"topic": "error handling and logging",
|
||||
"user_query": "I need to implement proper error handling and logging across my Python application. What's the best approach for production-ready error management?",
|
||||
"assistant_response": "Excellent question! Proper error handling and logging are crucial for production applications...",
|
||||
"code_context": {
|
||||
"file": "error_handling.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["error_handling", "logging", "exceptions", "middleware", "decorators", "fastapi"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I integrate this with external monitoring tools like Sentry?",
|
||||
"What's the best way to handle errors in background tasks?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"total_conversations": 5,
|
||||
"date_range": "2024-01-15 to 2024-01-19",
|
||||
"topics_covered": [
|
||||
"async/await patterns",
|
||||
"dataclass vs pydantic",
|
||||
"testing patterns",
|
||||
"performance optimization",
|
||||
"error handling and logging"
|
||||
],
|
||||
"code_patterns_discussed": [
|
||||
"asyncio", "aiohttp", "semaphores", "context_managers",
|
||||
"pydantic", "fastapi", "type_hints", "validation",
|
||||
"pytest", "async_testing", "fixtures", "mocking",
|
||||
"performance_optimization", "caching", "database_optimization",
|
||||
"error_handling", "logging", "exceptions", "middleware"
|
||||
],
|
||||
"difficulty_levels": {
|
||||
"beginner": 1,
|
||||
"intermediate": 2,
|
||||
"advanced": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,976 @@
|
|||
[
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "Re-work indirect dependencies",
|
||||
"description": "Wow, this was quite a ride. Indirect dependencies were always supported kind of on best effort. This PR puts them on some principled foundation. It fixes three crashes and three stale types reported. All tests are quite weird/obscure, they are designed to expose the flaws in current logic (plus one test that passes on master, but it covers important corner case, so I add it just in case ). A short summary of various fixes (in arbitrary order):\r\n* Update many outdated comments and docstrings\r\n* Missing transitive dependency is now considered stale\r\n* Handle transitive generic bases in indirection visitor\r\n* Handle chained alias targets in indirection visitor\r\n* Always record original aliases during semantic analysis\r\n* Delete `qualified_tvars` as a concept, they are not needed since long ago\r\n* Remove ad-hoc handling for `TypeInfo`s from `build.py`\r\n* Support symbols with setter type different from getter type\r\n\r\nIn general the logic should be more simple/straightforward now:\r\n* Get all types in a file (need both symbol types _and_ expression types since some types may be only local)\r\n* For each type _transitively_ find all named types in them (thus aggregating all interfaces the type depends on)\r\n* In case any type was forced using `get_proper_type()`, record the orginal type alias during semantic analysis\r\n\r\nNote since this makes the algorithm correct, it may also make it slower (most notably because we must visit generic bases). I tried to offset this by couple optimizations, hopefully performance impact will be minimal.",
|
||||
"url": "https://github.com/python/mypy/pull/19798",
|
||||
"date": "2025-09-05T13:54:52Z",
|
||||
"sha_or_number": "19798",
|
||||
"files_changed": [
|
||||
"mypy/build.py",
|
||||
"mypy/fixup.py",
|
||||
"mypy/indirection.py",
|
||||
"mypy/nodes.py",
|
||||
"mypy/semanal.py",
|
||||
"mypy/server/deps.py",
|
||||
"mypy/test/typefixture.py",
|
||||
"mypy/typeanal.py",
|
||||
"test-data/unit/check-incremental.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable, TypeInfo\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"after_code": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import (\n Decorator,\n Import,\n ImportAll,\n ImportBase,\n ImportFrom,\n MypyFile,\n OverloadedFuncDef,\n SymbolTable,\n)\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"diff_context": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable, TypeInfo\nfrom mypy.nodes import (\n Decorator,\n Import,\n ImportAll,\n ImportBase,\n ImportFrom,\n MypyFile,\n OverloadedFuncDef,\n SymbolTable,\n)\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. We also handle (c)\nabove; if a module has valid cache data *but* any of its\ndependencies was processed from source, then the module should be\nprocessed from source.\n\nA relatively simple optimization (outside SCCs) we might do in the\nfuture is as follows: if a node's cache data is valid, but one or more\nof its dependencies are out of date so we have to re-parse the node\nfrom source, once we have fully type-checked the node, we can decide\nwhether its symbol table actually changed compared to the cache data\n(by reading the cache data and comparing it to the data we would be\nwriting). If there is no change we can declare the node up to date,\nand any node that depends (and for which we have cached data, and\nwhose other dependencies are up to date) on it won't need to be\nre-parsed from source.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (c), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"after_code": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. Once we (re-)processed\nan SCC we check whether its interface (symbol table) is still fresh\n(matches previous cached value). If it is not, we consider dependent SCCs\nstale so that they need to be re-parsed as well.\n\nNote on indirect dependencies: normally dependencies are determined from\nimports, but since our type interfaces are \"opaque\" (i.e. symbol tables can\ncontain types identified by name), these are not enough. We *must* also\nadd \"indirect\" dependencies from types to their definitions. For this\npurpose, after we finished processing a module, we travers its type map and\nsymbol tables, and for each type we find (transitively) on which opaque/named\ntypes it depends.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (b), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"diff_context": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. We also handle (c)\nabove; if a module has valid cache data *but* any of its\ndependencies was processed from source, then the module should be\nprocessed from source.\n\nA relatively simple optimization (outside SCCs) we might do in the\nfuture is as follows: if a node's cache data is valid, but one or more\nof its dependencies are out of date so we have to re-parse the node\nfrom source, once we have fully type-checked the node, we can decide\nwhether its symbol table actually changed compared to the cache data\n(by reading the cache data and comparing it to the data we would be\nwriting). If there is no change we can declare the node up to date,\nand any node that depends (and for which we have cached data, and\nwhose other dependencies are up to date) on it won't need to be\nre-parsed from source.\ndo semantic analysis followed by type checking. Once we (re-)processed\nan SCC we check whether its interface (symbol table) is still fresh\n(matches previous cached value). If it is not, we consider dependent SCCs\nstale so that they need to be re-parsed as well.\n\nNote on indirect dependencies: normally dependencies are determined from\nimports, but since our type interfaces are \"opaque\" (i.e. symbol tables can\ncontain types identified by name), these are not enough. We *must* also\nadd \"indirect\" dependencies from types to their definitions. For this\npurpose, after we finished processing a module, we travers its type map and\nsymbol tables, and for each type we find (transitively) on which opaque/named\ntypes it depends.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (c), import cycles. Here\nFinally we have to decide how to handle (b), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n # TODO: find a more robust way to traverse *all* relevant types?\n all_types = list(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.append(sym.type)\n if isinstance(sym.node, TypeInfo):\n # TypeInfo symbols have some extra relevant types.\n all_types.extend(sym.node.bases)\n if sym.node.metaclass_type:\n all_types.append(sym.node.metaclass_type)\n if sym.node.typeddict_type:\n all_types.append(sym.node.typeddict_type)\n if sym.node.tuple_type:\n all_types.append(sym.node.tuple_type)\n self._patch_indirect_dependencies(self.type_checker().module_refs, all_types)\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"after_code": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n all_types = set(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.add(sym.type)\n # Special case: settable properties may have two types.\n if isinstance(sym.node, OverloadedFuncDef) and sym.node.is_property:\n assert isinstance(first_node := sym.node.items[0], Decorator)\n if first_node.var.setter_type:\n all_types.add(first_node.var.setter_type)\n # Using mod_alias_deps is unfortunate but needed, since it is highly impractical\n # (and practically impossible) to avoid all get_proper_type() calls. For example,\n # TypeInfo.bases and metaclass, *args and **kwargs, Overloaded.items, and trivial\n # aliases like Text = str, etc. all currently forced to proper types. Thus, we need\n # to record the original definitions as they are first seen in semanal.py.\n self._patch_indirect_dependencies(\n self.type_checker().module_refs | self.tree.mod_alias_deps, all_types\n )\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"diff_context": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n # TODO: find a more robust way to traverse *all* relevant types?\n all_types = list(self.type_map().values())\n all_types = set(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.append(sym.type)\n if isinstance(sym.node, TypeInfo):\n # TypeInfo symbols have some extra relevant types.\n all_types.extend(sym.node.bases)\n if sym.node.metaclass_type:\n all_types.append(sym.node.metaclass_type)\n if sym.node.typeddict_type:\n all_types.append(sym.node.typeddict_type)\n if sym.node.tuple_type:\n all_types.append(sym.node.tuple_type)\n self._patch_indirect_dependencies(self.type_checker().module_refs, all_types)\n all_types.add(sym.type)\n # Special case: settable properties may have two types.\n if isinstance(sym.node, OverloadedFuncDef) and sym.node.is_property:\n assert isinstance(first_node := sym.node.items[0], Decorator)\n if first_node.var.setter_type:\n all_types.add(first_node.var.setter_type)\n # Using mod_alias_deps is unfortunate but needed, since it is highly impractical\n # (and practically impossible) to avoid all get_proper_type() calls. For example,\n # TypeInfo.bases and metaclass, *args and **kwargs, Overloaded.items, and trivial\n # aliases like Text = str, etc. all currently forced to proper types. Thus, we need\n # to record the original definitions as they are first seen in semanal.py.\n self._patch_indirect_dependencies(\n self.type_checker().module_refs | self.tree.mod_alias_deps, all_types\n )\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 8,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: list[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"after_code": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"diff_context": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: list[Type]) -> None:\n def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "_patch_indirect_dependencies",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"after_code": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n # Note: if a dependency is not in graph anymore, it should be considered interface-stale.\n # This is important to trigger any relevant updates from indirect dependencies that were\n # removed in load_graph().\n stale_deps = {id for id in deps if id not in graph or not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"diff_context": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}\n # Note: if a dependency is not in graph anymore, it should be considered interface-stale.\n # This is important to trigger any relevant updates from indirect dependencies that were\n # removed in load_graph().\n stale_deps = {id for id in deps if id not in graph or not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive type aliases\n self.seen_aliases: set[types.TypeAliasType] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_aliases = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ not in self.seen_aliases:\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n continue\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n continue\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"after_code": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive types\n self.seen_types: set[types.TypeAliasType | types.Instance] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_types = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n # Note: instances are needed for `class str(Sequence[str]): ...`\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n return\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"diff_context": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive type aliases\n self.seen_aliases: set[types.TypeAliasType] = set()\n # User to avoid infinite recursion with recursive types\n self.seen_types: set[types.TypeAliasType | types.Instance] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_aliases = set()\n self.seen_types = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ not in self.seen_aliases:\n self.seen_aliases.add(typ)\n # Note: instances are needed for `class str(Sequence[str]): ...`\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n return\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_aliases.add(typ)\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_aliases.add(typ)\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 29,
|
||||
"function_name": "_visit_module_name",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression",
|
||||
"context_manager",
|
||||
"class_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n self._visit_type_tuple(t.args)\n if t.type:\n # Uses of a class depend on everything in the MRO,\n # as changes to classes in the MRO can add types to methods,\n # change property types, change the MRO itself, etc.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n if t.type.metaclass_type is not None:\n self._visit_module_name(t.type.metaclass_type.type.module_name)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"after_code": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n # Instance is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: bases, metaclass,\n # tuple type, and typeddict type.\n # Note: we cannot simply record the MRO, in case an intermediate base contains\n # a reference to type alias, this affects meaning of map_instance_to_supertype(),\n # see e.g. testDoubleReexportGenericUpdated.\n self._visit_type_tuple(t.args)\n if t.type:\n # Important optimization: instead of simply recording the definition and\n # recursing into bases, record the MRO and only traverse generic bases.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n for base in s.bases:\n if base.args:\n self._visit_type_tuple(base.args)\n if t.type.metaclass_type:\n self._visit(t.type.metaclass_type)\n if t.type.typeddict_type:\n self._visit(t.type.typeddict_type)\n if t.type.tuple_type:\n self._visit(t.type.tuple_type)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"diff_context": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n # Instance is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: bases, metaclass,\n # tuple type, and typeddict type.\n # Note: we cannot simply record the MRO, in case an intermediate base contains\n # a reference to type alias, this affects meaning of map_instance_to_supertype(),\n # see e.g. testDoubleReexportGenericUpdated.\n self._visit_type_tuple(t.args)\n if t.type:\n # Uses of a class depend on everything in the MRO,\n # as changes to classes in the MRO can add types to methods,\n # change property types, change the MRO itself, etc.\n # Important optimization: instead of simply recording the definition and\n # recursing into bases, record the MRO and only traverse generic bases.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n if t.type.metaclass_type is not None:\n self._visit_module_name(t.type.metaclass_type.type.module_name)\n for base in s.bases:\n if base.args:\n self._visit_type_tuple(base.args)\n if t.type.metaclass_type:\n self._visit(t.type.metaclass_type)\n if t.type.typeddict_type:\n self._visit(t.type.typeddict_type)\n if t.type.tuple_type:\n self._visit(t.type.tuple_type)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": "visit_callable_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n self._visit_type_list(list(t.items))\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"after_code": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n for item in t.items:\n self._visit(item)\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"diff_context": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n self._visit_type_list(list(t.items))\n for item in t.items:\n self._visit(item)\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_tuple_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n self._visit(types.get_proper_type(t))",
|
||||
"after_code": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n # Type alias is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: target and args.\n if t.alias:\n self._visit_module_name(t.alias.module)\n self._visit(t.alias.target)\n self._visit_type_list(t.args)",
|
||||
"diff_context": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n self._visit(types.get_proper_type(t))\n # Type alias is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: target and args.\n if t.alias:\n self._visit_module_name(t.alias.module)\n self._visit(t.alias.target)\n self._visit_type_list(t.args)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 3,
|
||||
"function_name": "visit_type_alias_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/nodes.py",
|
||||
"language": "python",
|
||||
"before_code": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"after_code": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Same as above but for coarse-grained dependencies (i.e. modules instead of full names)\n mod_alias_deps: set[str]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"diff_context": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Same as above but for coarse-grained dependencies (i.e. modules instead of full names)\n mod_alias_deps: set[str]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/nodes.py",
|
||||
"language": "python",
|
||||
"before_code": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine grained dependencies from aliases.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"after_code": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine-grained dependencies from aliases.\n module: Module where the alias was defined.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"diff_context": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine grained dependencies from aliases.\n to track fine-grained dependencies from aliases.\n module: Module where the alias was defined.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/semanal.py",
|
||||
"language": "python",
|
||||
"before_code": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[str], list[str], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of\n qualified type variable names for generic aliases, a set of names the alias depends on,\n and a list of type variables if the alias is generic.\n A schematic example for the dependencies:\n A = int\n B = str\n analyze_alias(Dict[A, B])[2] == {'__main__.A', '__main__.B'}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"after_code": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[tuple[str, str]], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of type variables for generic aliases,\n a set of names the alias depends on, and True if the original type has empty tuple index.\n An example for the dependencies:\n A = int\n B = str\n analyze_alias(dict[A, B])[2] == {('mod', 'mod.A'), ('mod', 'mod.B')}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"diff_context": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[str], list[str], bool]:\n ) -> tuple[Type | None, list[TypeVarLikeType], set[tuple[str, str]], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of\n qualified type variable names for generic aliases, a set of names the alias depends on,\n and a list of type variables if the alias is generic.\n A schematic example for the dependencies:\n If yes, return the corresponding type, a list of type variables for generic aliases,\n a set of names the alias depends on, and True if the original type has empty tuple index.\n An example for the dependencies:\n A = int\n B = str\n analyze_alias(Dict[A, B])[2] == {'__main__.A', '__main__.B'}\n analyze_alias(dict[A, B])[2] == {('mod', 'mod.A'), ('mod', 'mod.B')}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).",
|
||||
"coding_patterns": [
|
||||
"list_comprehension"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/semanal.py",
|
||||
"language": "python",
|
||||
"before_code": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[str], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope.\n \"\"\"\n if not aliases_used:\n # A basic optimization to avoid adding targets with no dependencies to\n # the `alias_deps` dict.\n return\n if target is None:\n target = self.scope.current_target()\n self.cur_mod_node.alias_deps[target].update(aliases_used)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"after_code": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[tuple[str, str]], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope. For\n coarse-grained mode, add just the module names where aliases are defined.\n \"\"\"\n if not aliases_used:\n return\n if target is None:\n target = self.scope.current_target()\n for mod, fn in aliases_used:\n self.cur_mod_node.alias_deps[target].add(fn)\n self.cur_mod_node.mod_alias_deps.add(mod)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"diff_context": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[str], target: str | None = None\n self, aliases_used: Collection[tuple[str, str]], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope.\n If `target` is None, then the target node used will be the current scope. For\n coarse-grained mode, add just the module names where aliases are defined.\n \"\"\"\n if not aliases_used:\n # A basic optimization to avoid adding targets with no dependencies to\n # the `alias_deps` dict.\n return\n if target is None:\n target = self.scope.current_target()\n self.cur_mod_node.alias_deps[target].update(aliases_used)\n for mod, fn in aliases_used:\n self.cur_mod_node.alias_deps[target].add(fn)\n self.cur_mod_node.mod_alias_deps.add(mod)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 15,
|
||||
"function_name": "is_mangled_global",
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"Add full names of type aliases on which the current node depends.",
|
||||
"coding_patterns": [
|
||||
"list_comprehension"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "chore: add cline_docs/ to .gitignore",
|
||||
"description": "Cline is a commonly used LLM tool which, under certain conditions, creates a cline_docs/ folder with task status and todo items etc\r\n\r\nThis folder is only helpful locally (unless we decide we want to add actual guidelines for Cline here, but thats outside the scope of this PR) so this PR adds it to .gitignore\r\n\r\n<!-- If this pull request fixes an issue, add \"Fixes #NNN\" with the issue number. -->\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19797",
|
||||
"date": "2025-09-05T02:35:14Z",
|
||||
"sha_or_number": "19797",
|
||||
"files_changed": [
|
||||
".gitignore"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "[mypyc] Add type annotations to tests",
|
||||
"description": "Missing type annotations can compromise test coverage. My eventual goal is to require annotations by default in all run tests.\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19794",
|
||||
"date": "2025-09-04T15:56:30Z",
|
||||
"sha_or_number": "19794",
|
||||
"files_changed": [
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/fixtures/typing-full.pyi",
|
||||
"mypyc/test-data/run-dunders.test",
|
||||
"mypyc/test-data/run-singledispatch.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"after_code": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, x: _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"diff_context": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, _T) -> int: pass\n def count(self, x: _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "sort",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"after_code": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*args: object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"diff_context": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*object) -> None: pass\ndef print(*args: object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "iter",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/typing-full.pyi",
|
||||
"language": "python",
|
||||
"before_code": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index): ...\nclass TypeVar:\n def __init__(self, name, *args, bound=None): ...\n def __or__(self, other): ...\n\ncast = 0\noverload = 0",
|
||||
"after_code": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index: Any) -> Any: ...\nclass TypeVar:\n def __init__(self, name: str, *args: Any, bound: Any = None): ...\n def __or__(self, other: Any) -> Any: ...\n\ncast = 0\noverload = 0",
|
||||
"diff_context": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index): ...\n def __getitem__(self, index: Any) -> Any: ...\nclass TypeVar:\n def __init__(self, name, *args, bound=None): ...\n def __or__(self, other): ...\n def __init__(self, name: str, *args: Any, bound: Any = None): ...\n def __or__(self, other: Any) -> Any: ...\n\ncast = 0\noverload = 0",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 7,
|
||||
"function_name": "__or__",
|
||||
"class_name": "TypeVar",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "Check functions without annotations in mypyc tests",
|
||||
"description": "c.f. https://github.com/python/mypy/pull/19217#discussion_r2314303410\r\n\r\nDisallowing functions without annotations (where not relevant to the tests) is probably a good idea, but this creates a large number of failures which would take some time to go through (many due to common issues, like untyped functions in the fixtures).\r\n\r\nAs a smaller step in the right direction, this sets `check_untyped_defs = True` for the `run-*` tests so that we at least check functions without annotations. ",
|
||||
"url": "https://github.com/python/mypy/pull/19792",
|
||||
"date": "2025-09-04T14:42:17Z",
|
||||
"sha_or_number": "19792",
|
||||
"files_changed": [
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/run-classes.test",
|
||||
"mypyc/test/test_run.py"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"after_code": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n def __new__(cls, *args: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"diff_context": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n def __new__(cls, *args: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "__new__",
|
||||
"class_name": "type",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "fix: Allow instantiation of type[None] in analyze_type_type_callee",
|
||||
"description": "<!-- If this pull request fixes an issue, add \"Fixes #NNN\" with the issue number. -->\r\n\r\n(Explain how this PR changes mypy.)\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n\r\nFixes #19660\r\n\r\nAllow instantiation of NoneType in type checker\r\n\r\nThis change fixes the error \"Cannot instantiate type 'Type[None]'\"\r\nwhen calling NoneType() or type(None)().\r\n\r\nBy treating NoneType as a callable that returns None, mypy can now correctly\r\nhandle such calls without raising spurious errors.\r\n\r\nAlso, I added test case testTypeUsingTypeCNoneType covering:\r\n- direct calls to type(None)() and NoneType()\r\n- functions accepting type[None] and type[NoneType] parameters and invoking them\r\n\r\nThis ensures proper handling of NoneType instantiation and prevents spurious errors.",
|
||||
"url": "https://github.com/python/mypy/pull/19782",
|
||||
"date": "2025-09-02T06:13:12Z",
|
||||
"sha_or_number": "19782",
|
||||
"files_changed": [
|
||||
"mypy/checkexpr.py",
|
||||
"test-data/unit/check-classes.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"19660"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"after_code": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n if isinstance(item, NoneType):\n # NoneType() returns None, so treat it as a callable that returns None\n return CallableType(\n arg_types=[],\n arg_kinds=[],\n arg_names=[],\n ret_type=NoneType(),\n fallback=self.named_type(\"builtins.function\"),\n name=None,\n from_type_type=True,\n )\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"diff_context": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n if isinstance(item, NoneType):\n # NoneType() returns None, so treat it as a callable that returns None\n return CallableType(\n arg_types=[],\n arg_kinds=[],\n arg_names=[],\n ret_type=NoneType(),\n fallback=self.named_type(\"builtins.function\"),\n name=None,\n from_type_type=True,\n )\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "standard",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "feat: new mypyc primitives for weakref.proxy",
|
||||
"description": "This PR adds 2 new weakref primitives for weakref.proxy (1 and 2 arg)\r\n\r\nThe C code generates correctly, but I'm not entirely sure why this test is failing. The weakly-proxied object is being destroyed too early, while there should still be a strong reference to it. It also fails if we use the builtin weakref.proxy, so I believe this might be exposing a reference counting bug unrelated to this PR.\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19217",
|
||||
"date": "2025-06-03T17:02:26Z",
|
||||
"sha_or_number": "19217",
|
||||
"files_changed": [
|
||||
"mypyc/primitives/weakref_ops.py",
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/irbuild-weakref.test",
|
||||
"mypyc/test-data/run-weakref.test",
|
||||
"test-data/unit/lib-stub/_weakref.pyi",
|
||||
"test-data/unit/lib-stub/weakref.pyi"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"after_code": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\nclass ReferenceError(Exception): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"diff_context": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\nclass ReferenceError(Exception): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": "StopIteration",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"class_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "test-data/unit/lib-stub/_weakref.pyi",
|
||||
"language": "python",
|
||||
"before_code": "",
|
||||
"after_code": "from typing import Any, Callable, TypeVar, overload\nfrom weakref import CallableProxyType, ProxyType\n\n_C = TypeVar(\"_C\", bound=Callable[..., Any])\n_T = TypeVar(\"_T\")\n\n# Return CallableProxyType if object is callable, ProxyType otherwise\n@overload\ndef proxy(object: _C, callback: Callable[[CallableProxyType[_C]], Any] | None = None, /) -> CallableProxyType[_C]: ...\n@overload\ndef proxy(object: _T, callback: Callable[[ProxyType[_T]], Any] | None = None, /) -> ProxyType[_T]: ...",
|
||||
"diff_context": "from typing import Any, Callable, TypeVar, overload\nfrom weakref import CallableProxyType, ProxyType\n\n_C = TypeVar(\"_C\", bound=Callable[..., Any])\n_T = TypeVar(\"_T\")\n\n# Return CallableProxyType if object is callable, ProxyType otherwise\n@overload\ndef proxy(object: _C, callback: Callable[[CallableProxyType[_C]], Any] | None = None, /) -> CallableProxyType[_C]: ...\n@overload\ndef proxy(object: _T, callback: Callable[[ProxyType[_T]], Any] | None = None, /) -> ProxyType[_T]: ...",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": "proxy",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"decorator",
|
||||
"function_definition"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-128307: Update what's new in 3.13 and 3.14 with create_task changes of asyncio (#134304)",
|
||||
"description": "gh-128307: Update what's new in 3.13 and 3.14 with create_task changes of asyncio (#134304)\n\nCo-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>",
|
||||
"url": "https://github.com/python/cpython/commit/28625d4f956f8d30671aba1daaac9735932983db",
|
||||
"date": "2025-05-20T08:41:22Z",
|
||||
"sha_or_number": "28625d4f956f8d30671aba1daaac9735932983db",
|
||||
"files_changed": [
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"Doc/whatsnew/3.14.rst"
|
||||
],
|
||||
"additions": 34,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"134304"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.13.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n\nbase64\n------",
|
||||
"after_code": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n* The function and methods named ``create_task`` have received a new\n ``**kwargs`` argument that is passed through to the task constructor.\n This change was accidentally added in 3.13.3,\n and broke the API contract for custom task factories.\n Several third-party task factories implemented workarounds for this.\n In 3.13.4 and later releases the old factory contract is honored\n once again (until 3.14).\n To keep the workarounds working, the extra ``**kwargs`` argument still\n allows passing additional keyword arguments to :class:`~asyncio.Task`\n and to custom task factories.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\nbase64\n------",
|
||||
"diff_context": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n* The function and methods named ``create_task`` have received a new\n ``**kwargs`` argument that is passed through to the task constructor.\n This change was accidentally added in 3.13.3,\n and broke the API contract for custom task factories.\n Several third-party task factories implemented workarounds for this.\n In 3.13.4 and later releases the old factory contract is honored\n once again (until 3.14).\n To keep the workarounds working, the extra ``**kwargs`` argument still\n allows passing additional keyword arguments to :class:`~asyncio.Task`\n and to custom task factories.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\nbase64\n------",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.14.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nbdb\n---\n",
|
||||
"after_code": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nasyncio\n-------\n\n* The function and methods named :func:`!create_task` now take an arbitrary\n list of keyword arguments. All keyword arguments are passed to the\n :class:`~asyncio.Task` constructor or the custom task factory.\n (See :meth:`~asyncio.loop.set_task_factory` for details.)\n The ``name`` and ``context`` keyword arguments are no longer special;\n the name should now be set using the ``name`` keyword argument of the factory,\n and ``context`` may be ``None``.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\n\nbdb\n---\n",
|
||||
"diff_context": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nasyncio\n-------\n\n* The function and methods named :func:`!create_task` now take an arbitrary\n list of keyword arguments. All keyword arguments are passed to the\n :class:`~asyncio.Task` constructor or the custom task factory.\n (See :meth:`~asyncio.loop.set_task_factory` for details.)\n The ``name`` and ``context`` keyword arguments are no longer special;\n the name should now be set using the ``name`` keyword argument of the factory,\n and ``context`` may be ``None``.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\n\nbdb\n---\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "Update CODEOWNERS (#126005)",
|
||||
"description": "Update CODEOWNERS (#126005)",
|
||||
"url": "https://github.com/python/cpython/commit/905eddceb2d61da9087f0d303aa7e4a405d2261a",
|
||||
"date": "2024-10-26T15:24:51Z",
|
||||
"sha_or_number": "905eddceb2d61da9087f0d303aa7e4a405d2261a",
|
||||
"files_changed": [
|
||||
".github/CODEOWNERS"
|
||||
],
|
||||
"additions": 2,
|
||||
"deletions": 2,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"126005"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "concise_subject; imperative_mood; references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "Withdraw most of my ownership in favor of Mark (#119611)",
|
||||
"description": "Withdraw most of my ownership in favor of Mark (#119611)",
|
||||
"url": "https://github.com/python/cpython/commit/3ff06ebec4e8b466f76078aa9c97cea2093d52ab",
|
||||
"date": "2024-05-27T18:07:16Z",
|
||||
"sha_or_number": "3ff06ebec4e8b466f76078aa9c97cea2093d52ab",
|
||||
"files_changed": [
|
||||
".github/CODEOWNERS"
|
||||
],
|
||||
"additions": 6,
|
||||
"deletions": 6,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"119611"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-117549: Don't use designated initializers in headers (#118580)",
|
||||
"description": "gh-117549: Don't use designated initializers in headers (#118580)\n\nThe designated initializer syntax in static inline functions in pycore_backoff.h\r\ncauses problems for C++ or MSVC users who aren't yet using C++20.\r\nWhile internal, pycore_backoff.h is included (indirectly, via pycore_code.h)\r\nby some key 3rd party software that does so for speed.",
|
||||
"url": "https://github.com/python/cpython/commit/40cc809902304f60c6e1c933191dd4d64e570e28",
|
||||
"date": "2024-05-05T19:28:55Z",
|
||||
"sha_or_number": "40cc809902304f60c6e1c933191dd4d64e570e28",
|
||||
"files_changed": [
|
||||
"Include/internal/pycore_backoff.h",
|
||||
"Misc/NEWS.d/next/Core and Builtins/2024-05-05-12-04-02.gh-issue-117549.kITawD.rst"
|
||||
],
|
||||
"additions": 12,
|
||||
"deletions": 2,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118580"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Misc/NEWS.d/next/Core and Builtins/2024-05-05-12-04-02.gh-issue-117549.kITawD.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "",
|
||||
"after_code": "Don't use designated initializer syntax in inline functions in internal\nheaders. They cause problems for C++ or MSVC users who aren't yet using the\nlatest C++ standard (C++20). While internal, pycore_backoff.h, is included\n(indirectly, via pycore_code.h) by some key 3rd party software that does so\nfor speed.",
|
||||
"diff_context": "Don't use designated initializer syntax in inline functions in internal\nheaders. They cause problems for C++ or MSVC users who aren't yet using the\nlatest C++ standard (C++20). While internal, pycore_backoff.h, is included\n(indirectly, via pycore_code.h) by some key 3rd party software that does so\nfor speed.",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-74929: Rudimentary docs for PEP 667 (#118581)",
|
||||
"description": "gh-74929: Rudimentary docs for PEP 667 (#118581)\n\nThis is *not* sufficient for the final 3.13 release, but it will do for beta 1:\r\n\r\n- What's new entry\r\n- Updated changelog entry (news blurb)\r\n- Mention the proxy for f_globals in the datamodel and Python frame object docs\r\n\r\nThis doesn't have any C API details (what's new refers to the PEP).",
|
||||
"url": "https://github.com/python/cpython/commit/9c13d9e37a194f574b8591da634bf98419786448",
|
||||
"date": "2024-05-05T15:31:26Z",
|
||||
"sha_or_number": "9c13d9e37a194f574b8591da634bf98419786448",
|
||||
"files_changed": [
|
||||
"Doc/c-api/frame.rst",
|
||||
"Doc/reference/datamodel.rst",
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"Misc/NEWS.d/next/Core and Builtins/2024-04-27-21-44-40.gh-issue-74929.C2nESp.rst"
|
||||
],
|
||||
"additions": 22,
|
||||
"deletions": 3,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118581"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Doc/c-api/frame.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute (:class:`dict`).\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"after_code": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute.\n If the frame refers to a function or comprehension, this returns\n a write-through proxy object that allows modifying the locals.\n In all other cases (classes, modules) it returns the :class:`dict`\n representing the frame locals directly.\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n .. versionchanged:: 3.13\n Return a proxy object for functions and comprehensions.\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"diff_context": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute (:class:`dict`).\n Get the *frame*'s :attr:`~frame.f_locals` attribute.\n If the frame refers to a function or comprehension, this returns\n a write-through proxy object that allows modifying the locals.\n In all other cases (classes, modules) it returns the :class:`dict`\n representing the frame locals directly.\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n .. versionchanged:: 3.13\n Return a proxy object for functions and comprehensions.\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 11,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/reference/datamodel.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"after_code": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`.\n If the frame refers to a function or comprehension,\n this may return a write-through proxy object.\n\n .. versionchanged:: 3.13\n Return a proxy for functions and comprehensions.\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"diff_context": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`\n :ref:`local variables <naming>`.\n If the frame refers to a function or comprehension,\n this may return a write-through proxy object.\n\n .. versionchanged:: 3.13\n Return a proxy for functions and comprehensions.\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.13.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"after_code": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\n* :pep:`667`: :attr:`FrameType.f_locals <frame.f_locals>` when used in\n a function now returns a write-through proxy to the frame's locals,\n rather than a ``dict``. See the PEP for corresponding C API changes\n and deprecations.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"diff_context": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\n* :pep:`667`: :attr:`FrameType.f_locals <frame.f_locals>` when used in\n a function now returns a write-through proxy to the frame's locals,\n rather than a ``dict``. See the PEP for corresponding C API changes\n and deprecations.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-118335: Rename --experimental-interpreter on Windows to --experimental-jit-interpreter (#118497)",
|
||||
"description": "gh-118335: Rename --experimental-interpreter on Windows to --experimental-jit-interpreter (#118497)\n\nAlso fix docs for this in whatsnew.",
|
||||
"url": "https://github.com/python/cpython/commit/a37b0932285b5e883b13a46ff2a32f15d7339894",
|
||||
"date": "2024-05-02T00:48:34Z",
|
||||
"sha_or_number": "a37b0932285b5e883b13a46ff2a32f15d7339894",
|
||||
"files_changed": [
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"PCbuild/build.bat"
|
||||
],
|
||||
"additions": 5,
|
||||
"deletions": 4,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118497"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Support TypeGuard (PEP 647) (#9865)",
|
||||
"description": "Support TypeGuard (PEP 647) (#9865)\n\nPEP 647 is still in draft mode, but it is likely to be accepted, and this helps solve some real issues.",
|
||||
"url": "https://github.com/python/mypy/commit/fffbe88fc54807c8b10ac40456522ad2faf8d350",
|
||||
"date": "2021-01-18T18:13:36Z",
|
||||
"sha_or_number": "fffbe88fc54807c8b10ac40456522ad2faf8d350",
|
||||
"files_changed": [
|
||||
"mypy/checker.py",
|
||||
"mypy/checkexpr.py",
|
||||
"mypy/constraints.py",
|
||||
"mypy/expandtype.py",
|
||||
"mypy/fixup.py",
|
||||
"mypy/nodes.py",
|
||||
"mypy/test/testcheck.py",
|
||||
"mypy/typeanal.py",
|
||||
"mypy/types.py",
|
||||
"test-data/unit/check-python38.test",
|
||||
"test-data/unit/check-serialize.test",
|
||||
"test-data/unit/check-typeguard.test",
|
||||
"test-data/unit/lib-stub/typing_extensions.pyi"
|
||||
],
|
||||
"additions": 408,
|
||||
"deletions": 9,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9865"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/checker.py",
|
||||
"language": "python",
|
||||
"before_code": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"after_code": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node.callee, RefExpr):\n if node.callee.type_guard is not None:\n # TODO: Follow keyword args or *args, **kwargs\n if node.arg_kinds[0] != nodes.ARG_POS:\n self.fail(\"Type guard requires positional argument\", node)\n return {}, {}\n if literal(expr) == LITERAL_TYPE:\n return {expr: TypeGuardType(node.callee.type_guard)}, {}\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"diff_context": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node.callee, RefExpr):\n if node.callee.type_guard is not None:\n # TODO: Follow keyword args or *args, **kwargs\n if node.arg_kinds[0] != nodes.ARG_POS:\n self.fail(\"Type guard requires positional argument\", node)\n return {}, {}\n if literal(expr) == LITERAL_TYPE:\n return {expr: TypeGuardType(node.callee.type_guard)}, {}\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"after_code": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (isinstance(e.callee, RefExpr)\n and isinstance(callee_type, CallableType)\n and callee_type.type_guard is not None):\n # Cache it for find_isinstance_check()\n e.callee.type_guard = callee_type.type_guard\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"diff_context": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (isinstance(e.callee, RefExpr)\n and isinstance(callee_type, CallableType)\n and callee_type.type_guard is not None):\n # Cache it for find_isinstance_check()\n e.callee.type_guard = callee_type.type_guard\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"after_code": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # Ignore the error about using get_proper_type().\n if isinstance(restriction, TypeGuardType): # type: ignore[misc]\n # A type guard forces the new type even if it doesn't overlap the old.\n return restriction.type_guard\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"diff_context": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # Ignore the error about using get_proper_type().\n if isinstance(restriction, TypeGuardType): # type: ignore[misc]\n # A type guard forces the new type even if it doesn't overlap the old.\n return restriction.type_guard\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"",
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/constraints.py",
|
||||
"language": "python",
|
||||
"before_code": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n res.extend(infer_constraints(template.ret_type, cactual.ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"after_code": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n template_ret_type, cactual_ret_type = template.ret_type, cactual.ret_type\n if template.type_guard is not None:\n template_ret_type = template.type_guard\n if cactual.type_guard is not None:\n cactual_ret_type = cactual.type_guard\n res.extend(infer_constraints(template_ret_type, cactual_ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"diff_context": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n res.extend(infer_constraints(template.ret_type, cactual.ret_type,\n template_ret_type, cactual_ret_type = template.ret_type, cactual.ret_type\n if template.type_guard is not None:\n template_ret_type = template.type_guard\n if cactual.type_guard is not None:\n cactual_ret_type = cactual.type_guard\n res.extend(infer_constraints(template_ret_type, cactual_ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/expandtype.py",
|
||||
"language": "python",
|
||||
"before_code": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"after_code": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self),\n type_guard=(t.type_guard.accept(self)\n if t.type_guard is not None else None))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"diff_context": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self))\n ret_type=t.ret_type.accept(self),\n type_guard=(t.type_guard.accept(self)\n if t.type_guard is not None else None))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_overloaded",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/fixup.py",
|
||||
"language": "python",
|
||||
"before_code": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"after_code": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n if ct.type_guard is not None:\n ct.type_guard.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"diff_context": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n if ct.type_guard is not None:\n ct.type_guard.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_overloaded",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Add a separate issue form to report crashes (#9549)",
|
||||
"description": "Add a separate issue form to report crashes (#9549)",
|
||||
"url": "https://github.com/python/mypy/commit/cca6e2fdc874b7538bd1d2ef70daab687b2a0363",
|
||||
"date": "2020-10-08T22:30:06Z",
|
||||
"sha_or_number": "cca6e2fdc874b7538bd1d2ef70daab687b2a0363",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/crash.md"
|
||||
],
|
||||
"additions": 41,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9549"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/crash.md",
|
||||
"language": "markdown",
|
||||
"before_code": "",
|
||||
"after_code": "---\nname: Crash Report\nabout: Crash (traceback or \"INTERNAL ERROR\")\nlabels: \"crash\"\n---\n\n<!--\n Use this form only if mypy reports an \"INTERNAL ERROR\" and/or gives a traceback.\n Please include the traceback and all other messages below (use `mypy --show-traceback`).\n-->\n\n**Crash Report**\n\n(Tell us what happened.)\n\n**Traceback**\n\n```\n(Insert traceback and other messages from mypy here -- use `--show-traceback`.)\n```\n\n**To Reproduce**\n\n(Write what you did to reproduce the crash. Full source code is\nappreciated. We also very much appreciate it if you try to narrow the\nsource down to a small stand-alone example.)\n\n**Your Environment**\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"diff_context": "---\nname: Crash Report\nabout: Crash (traceback or \"INTERNAL ERROR\")\nlabels: \"crash\"\n---\n\n<!--\n Use this form only if mypy reports an \"INTERNAL ERROR\" and/or gives a traceback.\n Please include the traceback and all other messages below (use `mypy --show-traceback`).\n-->\n\n**Crash Report**\n\n(Tell us what happened.)\n\n**Traceback**\n\n```\n(Insert traceback and other messages from mypy here -- use `--show-traceback`.)\n```\n\n**To Reproduce**\n\n(Write what you did to reproduce the crash. Full source code is\nappreciated. We also very much appreciate it if you try to narrow the\nsource down to a small stand-alone example.)\n\n**Your Environment**\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "imperative_mood; references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Make the new bug templates less markup-heavy (#9438)",
|
||||
"description": "Make the new bug templates less markup-heavy (#9438)\n\n- Remove emoji\r\n- Instead of `## H2 headings` just use `**bold**`\r\n- Add link to docs\r\n- Add suggestion for new users not to file a bug",
|
||||
"url": "https://github.com/python/mypy/commit/6f07cb6a2e02446b909846f99817f674675e826e",
|
||||
"date": "2020-09-11T18:35:59Z",
|
||||
"sha_or_number": "6f07cb6a2e02446b909846f99817f674675e826e",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/bug.md",
|
||||
".github/ISSUE_TEMPLATE/documentation.md",
|
||||
".github/ISSUE_TEMPLATE/feature.md",
|
||||
".github/ISSUE_TEMPLATE/question.md"
|
||||
],
|
||||
"additions": 24,
|
||||
"deletions": 18,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9438"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/bug.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🐛 Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?",
|
||||
"after_code": "---\nname: Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\n If you're new to mypy and you're not sure whether what you're experiencing is a mypy bug, please see the \"Question and Help\" form\n instead.\n-->\n\n**Bug Report**\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n(A clear and concise description of what the bug is.)\n\n**To Reproduce**\n\n(Write your steps here:)\n\n1. Step 1...\n2. Step 2...\n3. Step 3...\n\n**Expected Behavior**\n\n<!--\n How did you expect your project to behave?",
|
||||
"diff_context": "---\nname: 🐛 Bug Report\nname: Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\n If you're new to mypy and you're not sure whether what you're experiencing is a mypy bug, please see the \"Question and Help\" form\n instead.\n-->\n\n**Bug Report**\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n**To Reproduce**\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n2. Step 2...\n3. Step 3...\n\n## Expected Behavior\n**Expected Behavior**\n\n<!--\n How did you expect your project to behave?",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 20,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Add MYPY_CONFIG_FILE_DIR to environment when config file is read (2nd try) (#9414)",
|
||||
"description": "Add MYPY_CONFIG_FILE_DIR to environment when config file is read (2nd try) (#9414)\n\n(This fixes the mistake I introduced in the previous version.)\r\n\r\nResubmit of #9403.\r\n\r\nFixes #7968.\r\n\r\nCo-authored-by: aghast <aghast@aghast.dev>",
|
||||
"url": "https://github.com/python/mypy/commit/9d038469d80e36057c77e0a8a18831f829778f9d",
|
||||
"date": "2020-09-04T20:55:14Z",
|
||||
"sha_or_number": "9d038469d80e36057c77e0a8a18831f829778f9d",
|
||||
"files_changed": [
|
||||
"mypy/config_parser.py",
|
||||
"mypy/test/testcmdline.py",
|
||||
"test-data/unit/envvars.test"
|
||||
],
|
||||
"additions": 15,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9403",
|
||||
"7968",
|
||||
"9414"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "imperative_mood; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Revert \"Add MYPY_CONFIG_FILE_DIR to environment when config file is read (#9403)\"",
|
||||
"description": "Revert \"Add MYPY_CONFIG_FILE_DIR to environment when config file is read (#9403)\"\n\nReason: This broke CI.\n\nThis reverts commit 652aca96609c876c47ca7eaa68d67ac1e36f4215.",
|
||||
"url": "https://github.com/python/mypy/commit/57d3473ae906fe945953b874d3dcb66efb2710ca",
|
||||
"date": "2020-09-04T02:45:27Z",
|
||||
"sha_or_number": "57d3473ae906fe945953b874d3dcb66efb2710ca",
|
||||
"files_changed": [
|
||||
"mypy/config_parser.py",
|
||||
"mypy/test/testcmdline.py",
|
||||
"test-data/unit/envvars.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 15,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9403"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Revert issue template (#9345) -- it doesn't work",
|
||||
"description": "Revert issue template (#9345) -- it doesn't work\n\nThis reverts commit 18c84e0f6906cfb315c367aa35550a4727cb57f8.",
|
||||
"url": "https://github.com/python/mypy/commit/42a522089c6b418727e143c181128e902acf0908",
|
||||
"date": "2020-08-27T22:21:28Z",
|
||||
"sha_or_number": "42a522089c6b418727e143c181128e902acf0908",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/bug.md",
|
||||
".github/ISSUE_TEMPLATE/documentation.md",
|
||||
".github/ISSUE_TEMPLATE/feature.md",
|
||||
".github/ISSUE_TEMPLATE/question.md",
|
||||
".github/PULL_REQUEST_TEMPLATE.md",
|
||||
"ISSUE_TEMPLATE.md"
|
||||
],
|
||||
"additions": 20,
|
||||
"deletions": 110,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9345"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/bug.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🐛 Bug Report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?\n It’s fine if you’re not sure your understanding is correct.\n Write down what you thought would happen. If you just expected no errors, you can delete this section.\n-->\n\n(Write what you thought would happen.)\n\n## Actual Behavior\n\n<!--\n Did something go wrong?\n Is something broken, or not behaving as you expected?\n-->\n\n(Write what happened.)\n\n## Your Environment\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"after_code": "",
|
||||
"diff_context": "---\nname: 🐛 Bug Report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?\n It’s fine if you’re not sure your understanding is correct.\n Write down what you thought would happen. If you just expected no errors, you can delete this section.\n-->\n\n(Write what you thought would happen.)\n\n## Actual Behavior\n\n<!--\n Did something go wrong?\n Is something broken, or not behaving as you expected?\n-->\n\n(Write what happened.)\n\n## Your Environment\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/feature.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🚀 Feature\nlabels: \"feature\"\n---\n\n## 🚀 Feature\n\n(A clear and concise description of your feature proposal.)\n\n## Pitch\n\n(Please explain why this feature should be implemented and how it would be used. Add examples, if applicable.)",
|
||||
"after_code": "",
|
||||
"diff_context": "---\nname: 🚀 Feature\nlabels: \"feature\"\n---\n\n## 🚀 Feature\n\n(A clear and concise description of your feature proposal.)\n\n## Pitch\n\n(Please explain why this feature should be implemented and how it would be used. Add examples, if applicable.)",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": ".github/PULL_REQUEST_TEMPLATE.md",
|
||||
"language": "markdown",
|
||||
"before_code": "### Have you read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)?\n\n(Once you have, delete this section. If you leave it in, your PR may be closed without action.)\n\n### Description\n\n<!--\nIf this pull request closes or fixes an issue, write Closes #NNN\" or \"Fixes #NNN\" in that exact\nformat.\n-->\n\n(Explain how this PR changes mypy.)\n\n## Test Plan\n\n<!--\nIf this is a documentation change, rebuild the docs (link to instructions) and review the changed pages for markup errors.\nIf this is a code change, include new tests (link to the testing docs). Be sure to run the tests locally and fix any errors before submitting the PR (more instructions).\nIf this change cannot be tested by the CI, please explain how to verify it manually.\n-->\n\n(Write your test plan here. If you changed any code, please provide us with clear instructions on how you verified your changes work.)",
|
||||
"after_code": "",
|
||||
"diff_context": "### Have you read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)?\n\n(Once you have, delete this section. If you leave it in, your PR may be closed without action.)\n\n### Description\n\n<!--\nIf this pull request closes or fixes an issue, write Closes #NNN\" or \"Fixes #NNN\" in that exact\nformat.\n-->\n\n(Explain how this PR changes mypy.)\n\n## Test Plan\n\n<!--\nIf this is a documentation change, rebuild the docs (link to instructions) and review the changed pages for markup errors.\nIf this is a code change, include new tests (link to the testing docs). Be sure to run the tests locally and fix any errors before submitting the PR (more instructions).\nIf this change cannot be tested by the CI, please explain how to verify it manually.\n-->\n\n(Write your test plan here. If you changed any code, please provide us with clear instructions on how you verified your changes work.)",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
Assistant Guidelines
|
||||
These rules are absolutely imperative to adhere to. Comply with them precisely as they are outlined.
|
||||
|
||||
The agent must use sequential thinking MCP tool to work out problems.
|
||||
|
||||
Core Behavior Guidelines
|
||||
|
||||
Respond only to explicit requests. Do not add files, code, tests, or comments unless asked.
|
||||
|
||||
Follow instructions precisely. No assumptions or speculative additions.
|
||||
|
||||
Use provided context accurately.
|
||||
|
||||
Avoid extra output. No debugging logs or test harnesses unless requested.
|
||||
|
||||
Produce clean, optimized code when code is requested. Respect existing style.
|
||||
|
||||
Deliver complete, standalone solutions. No placeholders.
|
||||
|
||||
Limit file creation. Only create new files when necessary.
|
||||
|
||||
If you modify the model in a user's code, you must confirm with the user and never be sneaky. Always tell the user exactly what you are doing.
|
||||
|
||||
Communication & Delivery
|
||||
|
||||
9. Don't explain unless asked. Do not expose reasoning in outputs.
|
||||
10. If unsure, say "I don't know." Avoid hallucinated content.
|
||||
11. Maintain consistency across sessions. Refer to project memory and documentation.
|
||||
12. Respect privacy and permissions. Never leak or infer secure data.
|
||||
13. Prioritize targeted edits over full rewrites.
|
||||
14. Optimize incrementally. Avoid unnecessary overhauls.
|
||||
|
||||
Spec.md Requirement
|
||||
|
||||
You must maintain a file named Spec.md. This file acts as the single source of truth for the project.
|
||||
|
||||
Rules:
|
||||
|
||||
Before starting any implementation, check if Spec.md already exists.
|
||||
|
||||
If it does not exist, create one using the template provided below.
|
||||
|
||||
Always update Spec.md before and after any major change.
|
||||
|
||||
Use the contents of Spec.md to guide logic, structure, and implementation decisions.
|
||||
|
||||
When updating a section, condense previous content to keep the document concise.
|
||||
|
||||
Spec.md Starter Template (Plain Text Format)
|
||||
|
||||
Title: Spec.md – Project Specification
|
||||
|
||||
Section: Purpose
|
||||
Describe the main goal of this feature, tool, or system.
|
||||
|
||||
Section: Core Functionality
|
||||
List the key features, expected behaviors, and common use cases.
|
||||
|
||||
Section: Architecture Overview
|
||||
Summarize the technical setup, frameworks used, and main modules or services.
|
||||
|
||||
Section: Input and Output Contracts
|
||||
List all inputs and outputs in a table-like format:
|
||||
|
||||
Input: describe the input data, its format, and where it comes from.
|
||||
|
||||
Output: describe the output data, its format, and its destination.
|
||||
|
||||
Section: Edge Cases and Constraints
|
||||
List known limitations, special scenarios, and fallback behaviors.
|
||||
|
||||
Section: File and Module Map
|
||||
List all important files or modules and describe what each one is responsible for.
|
||||
|
||||
Section: Open Questions or TODOs
|
||||
Create a checklist of unresolved decisions, logic that needs clarification, or tasks that are still pending.
|
||||
|
||||
Section: Last Updated
|
||||
Include the most recent update date and who made the update.
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
# PEP 8 Style Guide: Essentials
|
||||
|
||||
## Code Layout
|
||||
- Indentation: 4 spaces per level
|
||||
- Line length: 79 for code (88/100 acceptable by team), 72 for comments/docstrings
|
||||
- Blank lines: 2 around top-level defs/classes, 1 between methods
|
||||
|
||||
```python
|
||||
# Hanging indent for long calls
|
||||
foo = long_function_name(
|
||||
var_one, var_two,
|
||||
var_three, var_four,
|
||||
)
|
||||
```
|
||||
|
||||
## Imports
|
||||
- One import per line
|
||||
- Group: stdlib, third-party, local
|
||||
- Prefer absolute imports; avoid wildcard imports
|
||||
|
||||
```python
|
||||
import os
|
||||
import sys
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
import requests
|
||||
|
||||
from myproject.models import User
|
||||
```
|
||||
|
||||
## Whitespace
|
||||
- No space inside brackets or before commas/semicolons
|
||||
- Spaces around binary operators
|
||||
|
||||
```python
|
||||
x = 1
|
||||
hypot2 = x * x + y * y
|
||||
```
|
||||
|
||||
## Naming
|
||||
- snake_case: functions, variables
|
||||
- PascalCase: classes
|
||||
- SCREAMING_SNAKE_CASE: constants
|
||||
|
||||
## Comments & Docstrings
|
||||
- Use complete sentences; keep up to date
|
||||
- Triple-double quotes for public modules, classes, functions
|
||||
```python
|
||||
def f(x: int) -> int:
|
||||
"""Return x doubled."""
|
||||
return x * 2
|
||||
```
|
||||
|
||||
## Type Hints
|
||||
- Space after colon; arrow for returns
|
||||
```python
|
||||
def munge(s: str) -> str: ...
|
||||
```
|
||||
|
||||
## Tooling
|
||||
- Black, isort, Flake8 (or Ruff) to automate style
|
||||
- Example pyproject.toml excerpt:
|
||||
```toml
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
```
|
||||
|
||||
## Common Violations
|
||||
- E501: line too long -> break with parentheses
|
||||
- E225: missing whitespace around operator
|
||||
- E402: module import not at top of file
|
||||
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
# The Zen of Python: Practical Guide
|
||||
|
||||
## Overview
|
||||
The Zen of Python (Tim Peters, import this) captures Python's philosophy. Use these principles as a checklist during design, coding, and reviews.
|
||||
|
||||
## Key Principles With Guidance
|
||||
|
||||
### 1. Beautiful is better than ugly
|
||||
Prefer descriptive names, clear structure, and consistent formatting.
|
||||
|
||||
### 2. Explicit is better than implicit
|
||||
Be clear about behavior, imports, and types.
|
||||
```python
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
def get_future_date(days_ahead: int) -> datetime:
|
||||
return datetime.now() + timedelta(days=days_ahead)
|
||||
```
|
||||
|
||||
### 3. Simple is better than complex
|
||||
Choose straightforward solutions first.
|
||||
|
||||
### 4. Complex is better than complicated
|
||||
When complexity is needed, organize it with clear abstractions.
|
||||
|
||||
### 5. Flat is better than nested
|
||||
Use early returns to reduce indentation.
|
||||
|
||||
### 6. Sparse is better than dense
|
||||
Give code room to breathe with whitespace.
|
||||
|
||||
### 7. Readability counts
|
||||
Optimize for human readers; add docstrings for nontrivial code.
|
||||
|
||||
### 8. Special cases aren't special enough to break the rules
|
||||
Stay consistent; exceptions should be rare and justified.
|
||||
|
||||
### 9. Although practicality beats purity
|
||||
Prefer practical solutions that teams can maintain.
|
||||
|
||||
### 10. Errors should never pass silently
|
||||
Handle exceptions explicitly; log with context.
|
||||
|
||||
### 11. Unless explicitly silenced
|
||||
Silence only specific, acceptable errors and document why.
|
||||
|
||||
### 12. In the face of ambiguity, refuse the temptation to guess
|
||||
Require explicit inputs and behavior.
|
||||
|
||||
### 13. There should be one obvious way to do it
|
||||
Prefer standard library patterns and idioms.
|
||||
|
||||
### 14. Although that way may not be obvious at first
|
||||
Learn Python idioms; embrace clarity over novelty.
|
||||
|
||||
### 15. Now is better than never; 16. Never is often better than right now
|
||||
Iterate, but don't rush broken code.
|
||||
|
||||
### 17/18. Hard to explain is bad; easy to explain is good
|
||||
Prefer designs you can explain simply.
|
||||
|
||||
### 19. Namespaces are one honking great idea
|
||||
Use modules/packages to separate concerns; avoid wildcard imports.
|
||||
|
||||
## Modern Python Tie-ins
|
||||
- Type hints reinforce explicitness
|
||||
- Context managers enforce safe resource handling
|
||||
- Dataclasses improve readability for data containers
|
||||
|
||||
## Quick Review Checklist
|
||||
- Is it readable and explicit?
|
||||
- Is this the simplest working solution?
|
||||
- Are errors explicit and logged?
|
||||
- Are modules/namespaces used appropriately?
|
||||
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
from typing import Any, Optional
|
||||
|
||||
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
||||
from datetime import datetime, timezone
|
||||
|
||||
logger = get_logger("ChunksRetriever")
|
||||
|
||||
|
|
@ -30,18 +31,13 @@ class ChunksRetriever(BaseRetriever):
|
|||
async def get_context(self, query: str) -> Any:
|
||||
"""
|
||||
Retrieves document chunks context based on the query.
|
||||
|
||||
Searches for document chunks relevant to the specified query using a vector engine.
|
||||
Raises a NoDataError if no data is found in the system.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- query (str): The query string to search for relevant document chunks.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- Any: A list of document chunk payloads retrieved from the search.
|
||||
"""
|
||||
logger.info(
|
||||
|
|
@ -53,6 +49,8 @@ class ChunksRetriever(BaseRetriever):
|
|||
try:
|
||||
found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
|
||||
logger.info(f"Found {len(found_chunks)} chunks from vector search")
|
||||
await update_node_access_timestamps(found_chunks)
|
||||
|
||||
except CollectionNotFoundError as error:
|
||||
logger.error("DocumentChunk_text collection not found in vector database")
|
||||
raise NoDataError("No data found in the system, please add data first.") from error
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from cognee.modules.retrieval.utils.session_cache import (
|
|||
save_conversation_history,
|
||||
get_conversation_history,
|
||||
)
|
||||
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
||||
|
|
@ -65,7 +66,7 @@ class CompletionRetriever(BaseRetriever):
|
|||
|
||||
if len(found_chunks) == 0:
|
||||
return ""
|
||||
|
||||
await update_node_access_timestamps(found_chunks)
|
||||
# Combine all chunks text returned from vector search (number of chunks is determined by top_k
|
||||
chunks_payload = [found_chunk.payload["text"] for found_chunk in found_chunks]
|
||||
combined_context = "\n".join(chunks_payload)
|
||||
|
|
|
|||
|
|
@ -16,11 +16,13 @@ from cognee.modules.retrieval.utils.session_cache import (
|
|||
)
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
|
||||
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
||||
from cognee.modules.retrieval.utils.models import CogneeUserInteraction
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.context_global_variables import session_user
|
||||
from cognee.infrastructure.databases.cache.config import CacheConfig
|
||||
from cognee.modules.graph.utils import get_entity_nodes_from_triplets
|
||||
|
||||
logger = get_logger("GraphCompletionRetriever")
|
||||
|
||||
|
|
@ -145,6 +147,9 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|||
|
||||
# context = await self.resolve_edges_to_text(triplets)
|
||||
|
||||
entity_nodes = get_entity_nodes_from_triplets(triplets)
|
||||
|
||||
await update_node_access_timestamps(entity_nodes)
|
||||
return triplets
|
||||
|
||||
async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from cognee.shared.logging_utils import get_logger
|
|||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
||||
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
||||
|
||||
logger = get_logger("SummariesRetriever")
|
||||
|
|
@ -54,6 +55,9 @@ class SummariesRetriever(BaseRetriever):
|
|||
"TextSummary_text", query, limit=self.top_k
|
||||
)
|
||||
logger.info(f"Found {len(summaries_results)} summaries from vector search")
|
||||
|
||||
await update_node_access_timestamps(summaries_results)
|
||||
|
||||
except CollectionNotFoundError as error:
|
||||
logger.error("TextSummary_text collection not found in vector database")
|
||||
raise NoDataError("No data found in the system, please add data first.") from error
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class TripletRetriever(BaseRetriever):
|
|||
"""Initialize retriever with optional custom prompt paths."""
|
||||
self.user_prompt_path = user_prompt_path
|
||||
self.system_prompt_path = system_prompt_path
|
||||
self.top_k = top_k if top_k is not None else 1
|
||||
self.top_k = top_k if top_k is not None else 5
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
async def get_context(self, query: str) -> str:
|
||||
|
|
|
|||
88
cognee/modules/retrieval/utils/access_tracking.py
Normal file
88
cognee/modules/retrieval/utils/access_tracking.py
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
"""Utilities for tracking data access in retrievers."""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Any
|
||||
from uuid import UUID
|
||||
import os
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.models import Data
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from sqlalchemy import update
|
||||
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def update_node_access_timestamps(items: List[Any]):
|
||||
if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
|
||||
return
|
||||
|
||||
if not items:
|
||||
return
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
timestamp_dt = datetime.now(timezone.utc)
|
||||
|
||||
# Extract node IDs
|
||||
node_ids = []
|
||||
for item in items:
|
||||
item_id = item.payload.get("id") if hasattr(item, "payload") else item.get("id")
|
||||
if item_id:
|
||||
node_ids.append(str(item_id))
|
||||
|
||||
if not node_ids:
|
||||
return
|
||||
|
||||
# Focus on document-level tracking via projection
|
||||
try:
|
||||
doc_ids = await _find_origin_documents_via_projection(graph_engine, node_ids)
|
||||
if doc_ids:
|
||||
await _update_sql_records(doc_ids, timestamp_dt)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update SQL timestamps: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def _find_origin_documents_via_projection(graph_engine, node_ids):
|
||||
"""Find origin documents using graph projection instead of DB queries"""
|
||||
# Project the entire graph with necessary properties
|
||||
memory_fragment = CogneeGraph()
|
||||
await memory_fragment.project_graph_from_db(
|
||||
graph_engine,
|
||||
node_properties_to_project=["id", "type"],
|
||||
edge_properties_to_project=["relationship_name"],
|
||||
)
|
||||
|
||||
# Find origin documents by traversing the in-memory graph
|
||||
doc_ids = set()
|
||||
for node_id in node_ids:
|
||||
node = memory_fragment.get_node(node_id)
|
||||
if node and node.get_attribute("type") == "DocumentChunk":
|
||||
# Traverse edges to find connected documents
|
||||
for edge in node.get_skeleton_edges():
|
||||
# Get the neighbor node
|
||||
neighbor = (
|
||||
edge.get_destination_node()
|
||||
if edge.get_source_node().id == node_id
|
||||
else edge.get_source_node()
|
||||
)
|
||||
if neighbor and neighbor.get_attribute("type") in ["TextDocument", "Document"]:
|
||||
doc_ids.add(neighbor.id)
|
||||
|
||||
return list(doc_ids)
|
||||
|
||||
|
||||
async def _update_sql_records(doc_ids, timestamp_dt):
|
||||
"""Update SQL Data table (same for all providers)"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
stmt = (
|
||||
update(Data)
|
||||
.where(Data.id.in_([UUID(doc_id) for doc_id in doc_ids]))
|
||||
.values(last_accessed=timestamp_dt)
|
||||
)
|
||||
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
|
@ -16,24 +16,6 @@ logger = get_logger(level=ERROR)
|
|||
|
||||
|
||||
def format_triplets(edges):
|
||||
print("\n\n\n")
|
||||
|
||||
def filter_attributes(obj, attributes):
|
||||
"""Helper function to filter out non-None properties, including nested dicts."""
|
||||
result = {}
|
||||
for attr in attributes:
|
||||
value = getattr(obj, attr, None)
|
||||
if value is not None:
|
||||
# If the value is a dict, extract relevant keys from it
|
||||
if isinstance(value, dict):
|
||||
nested_values = {
|
||||
k: v for k, v in value.items() if k in attributes and v is not None
|
||||
}
|
||||
result[attr] = nested_values
|
||||
else:
|
||||
result[attr] = value
|
||||
return result
|
||||
|
||||
triplets = []
|
||||
for edge in edges:
|
||||
node1 = edge.node1
|
||||
|
|
|
|||
|
|
@ -1,18 +1,9 @@
|
|||
from uuid import UUID, uuid4
|
||||
from fastapi_users.exceptions import UserAlreadyExists
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.notebooks.models.Notebook import Notebook
|
||||
from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
|
||||
from cognee.modules.users.exceptions import TenantNotFoundError
|
||||
from cognee.modules.users.get_user_manager import get_user_manager_context
|
||||
from cognee.modules.users.get_user_db import get_user_db_context
|
||||
from cognee.modules.users.models.User import UserCreate
|
||||
from cognee.modules.users.models.Tenant import Tenant
|
||||
|
||||
from sqlalchemy import select
|
||||
from typing import Optional
|
||||
|
||||
|
||||
async def create_user(
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue