Merge branch 'dev' into check-permissions-on-dataset-fix

This commit is contained in:
Igor Ilic 2025-06-27 15:01:12 +02:00 committed by GitHub
commit 3d7318e9ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 3926 additions and 3874 deletions

View file

@ -99,7 +99,7 @@ jobs:
- name: Install Kuzu extra
run: |
poetry install -E kuzu
poetry install
- name: Run Kuzu Example
env:
@ -188,7 +188,7 @@ jobs:
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master
@ -265,5 +265,3 @@ jobs:
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: |
poetry run python examples/database_examples/pgvector_example.py

View file

@ -234,7 +234,7 @@ jobs:
- name: Install specific graph db dependency
run: |
poetry install -E kuzu
poetry install
- name: Run parallel databases test
env:

View file

@ -35,7 +35,7 @@ jobs:
- name: Install specific db dependency
run: |
poetry install -E kuzu
poetry install
- name: Run Kuzu Tests
env:

View file

@ -123,7 +123,7 @@ jobs:
- name: Install specific db dependency
run: |
poetry install -E postgres -E kuzu
poetry install -E postgres
- name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql)
env:

View file

@ -35,7 +35,7 @@ jobs:
- name: Install specific db dependency
run: |
poetry install -E kuzu
poetry install
- name: Run Kuzu search Tests
env:
@ -136,7 +136,7 @@ jobs:
python-version: ${{ inputs.python-version }}
- name: Install dependencies
run: poetry install -E kuzu -E postgres
run: poetry install -E postgres
- name: Run Kuzu/PGVector/Postgres Tests
env:

View file

@ -31,7 +31,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
# Copy Alembic configuration
COPY alembic.ini /app/alembic.ini
@ -41,7 +41,7 @@ COPY alembic/ /app/alembic
# Installing separately from its dependencies allows optimal layer caching
COPY ./cognee /app/cognee
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
FROM python:3.12-slim-bookworm

View file

@ -32,6 +32,8 @@
<a href="https://trendshift.io/repositories/13955" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13955" alt="topoteretes%2Fcognee | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
**🚀 We are launching Cognee SaaS: Sign up [here](https://www.cognee.ai/waitlist) for the hosted beta!**
Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Extract, Cognify, Load) pipelines.
More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github.com/topoteretes/cognee/tree/main/evals)

View file

@ -22,7 +22,8 @@ class config:
LocalStorage.ensure_directory_exists(databases_directory_path)
graph_config = get_graph_config()
graph_config.graph_file_path = os.path.join(databases_directory_path, "cognee.graph")
graph_file_name = graph_config.graph_filename
graph_config.graph_file_path = os.path.join(databases_directory_path, graph_file_name)
vector_config = get_vectordb_config()
if vector_config.vector_db_provider == "lancedb":

View file

@ -29,8 +29,8 @@ class GraphConfig(BaseSettings):
- model_config
"""
graph_filename: str = "cognee_graph.pkl"
graph_database_provider: str = "NETWORKX"
graph_filename: str = "cognee_graph"
graph_database_provider: str = "kuzu"
graph_database_url: str = ""
graph_database_username: str = ""
graph_database_password: str = ""

View file

@ -1421,16 +1421,25 @@ class KuzuAdapter(GraphDBInterface):
async def delete_graph(self) -> None:
"""
Delete all data from the graph while preserving the database structure.
Delete all data from the graph directory.
This method removes all nodes and relationships from the graph but maintains the
underlying database for future use. It raises exceptions for failures occurring during
deletion processes.
This method deletes all nodes and relationships from the graph directory
It raises exceptions for failures occurring during deletion processes.
"""
try:
# Use DETACH DELETE to remove both nodes and their relationships in one operation
await self.query("MATCH (n:Node) DETACH DELETE n")
logger.info("Cleared all data from graph while preserving structure")
if self.connection:
self.connection = None
if self.db:
self.db.close()
self.db = None
if os.path.exists(self.db_path):
shutil.rmtree(self.db_path)
logger.info(f"Deleted Kuzu database files at {self.db_path}")
except Exception as e:
logger.error(f"Failed to delete graph data: {e}")
raise

View file

@ -96,7 +96,9 @@ async def main():
from cognee.infrastructure.databases.graph import get_graph_config
graph_config = get_graph_config()
assert not os.path.exists(graph_config.graph_file_path), "Networkx graph database is not empty"
assert not os.path.exists(graph_config.graph_file_path) or not os.listdir(
graph_config.graph_file_path
), "Kuzu graph directory is not empty"
if __name__ == "__main__":

View file

@ -19,15 +19,14 @@ async def main():
await cognee.cognify()
graph_engine = await get_graph_engine()
graph = await graph_engine.get_graph()
graph = await graph_engine.get_graph_data()
type_counts = Counter(
node_data["type"] for _, node_data in graph.nodes(data=True) if "type" in node_data
)
type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
edge_type_counts = Counter(edge_type for _, _, edge_type in graph.edges(keys=True))
edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
logging.info(type_counts)
logging.info(edge_type_counts)
# Assert there is exactly one PdfDocument.
assert type_counts.get("PdfDocument", 0) == 1, (

View file

@ -165,11 +165,56 @@ class TestInsightsRetriever:
await graph_engine.add_edges(
[
(person1.id, company1.id, "works_for"),
(person2.id, company2.id, "works_for"),
(person3.id, company3.id, "works_for"),
(person4.id, company1.id, "works_for"),
(person5.id, company1.id, "works_for"),
(
(str)(person1.id),
(str)(company1.id),
"works_for",
dict(
relationship_name="works_for",
source_node_id=person1.id,
target_node_id=company1.id,
),
),
(
(str)(person2.id),
(str)(company2.id),
"works_for",
dict(
relationship_name="works_for",
source_node_id=person2.id,
target_node_id=company2.id,
),
),
(
(str)(person3.id),
(str)(company3.id),
"works_for",
dict(
relationship_name="works_for",
source_node_id=person3.id,
target_node_id=company3.id,
),
),
(
(str)(person4.id),
(str)(company1.id),
"works_for",
dict(
relationship_name="works_for",
source_node_id=person4.id,
target_node_id=company1.id,
),
),
(
(str)(person5.id),
(str)(company1.id),
"works_for",
dict(
relationship_name="works_for",
source_node_id=person5.id,
target_node_id=company1.id,
),
),
]
)

11
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "aiobotocore"
@ -4395,10 +4395,9 @@ files = [
name = "kuzu"
version = "0.9.0"
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
optional = true
optional = false
python-versions = "*"
groups = ["main"]
markers = "extra == \"api\" or extra == \"kuzu\""
files = [
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9f216d67c092ea52086c99cf4b1deabe0f8daaf47c80cf1892b3b41c57d58a"},
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bda6d845bf1c7da204ffa7730573118f2d43fe6b14b1a5d0d2845ec3d3481362"},
@ -7613,6 +7612,7 @@ files = [
{file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
{file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
{file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
{file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
{file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
{file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
{file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@ -12027,7 +12027,7 @@ cffi = ["cffi (>=1.11)"]
[extras]
anthropic = ["anthropic"]
api = ["gunicorn", "kuzu", "uvicorn", "websockets"]
api = ["gunicorn", "uvicorn", "websockets"]
aws = ["s3fs"]
chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
@ -12042,7 +12042,6 @@ graphiti = ["graphiti-core"]
groq = ["groq"]
gui = ["pyside6", "qasync"]
huggingface = ["transformers"]
kuzu = ["kuzu"]
langchain = ["langchain_text_splitters", "langsmith"]
llama-index = ["llama-index-core"]
milvus = ["pymilvus"]
@ -12059,4 +12058,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<=3.13"
content-hash = "a7477a8e1c24cf477043625c926c7ebacd0b4dc66e76630e900f703a93c90986"
content-hash = "de20ef3dd7b184575facc5954017e974ad0171d628ec87dc852391c0d7b2b38b"

View file

@ -55,14 +55,14 @@ dependencies = [
"structlog>=25.2.0,<26",
"onnxruntime<=1.21.1",
"pylance==0.22.0",
"kuzu==0.9.0"
]
[project.optional-dependencies]
api = [
"uvicorn==0.34.0",
"gunicorn>=20.1.0,<24",
"websockets>=15.0.1",
"kuzu==0.9.0",
"websockets>=15.0.1"
]
weaviate = ["weaviate-client==4.9.6"]
qdrant = ["qdrant-client>=1.14.2,<2"]
@ -92,7 +92,6 @@ anthropic = ["anthropic>=0.26.1,<0.27"]
deepeval = ["deepeval>=2.0.1,<3"]
posthog = ["posthog>=3.5.0,<4"]
falkordb = ["falkordb==1.0.9"]
kuzu = ["kuzu==0.9.0"]
groq = ["groq==0.8.0"]
milvus = ["pymilvus>=2.5.0,<3"]
chromadb = [

7670
uv.lock generated

File diff suppressed because it is too large Load diff