From 10ece0638f564d6ca92dd9f84daf0f0041822733 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 17:33:59 +0200 Subject: [PATCH 01/25] clean up poetry --- cognee/infrastructure/llm/config.py | 35 +++++++++++-------- .../baml_src/extraction/extract_summary.py | 6 ++++ .../knowledge_graph/extract_content_graph.py | 3 ++ pyproject.toml | 15 ++++---- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index de2e2168e..1ff8f6433 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -3,7 +3,10 @@ from typing import Optional, ClassVar from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic import model_validator -from baml_py import ClientRegistry +try: + from baml_py import ClientRegistry +except ImportError: + ClientRegistry = None class LLMConfig(BaseSettings): @@ -63,25 +66,27 @@ class LLMConfig(BaseSettings): fallback_endpoint: str = "" fallback_model: str = "" - baml_registry: ClassVar[ClientRegistry] = ClientRegistry() + baml_registry: ClassVar = None model_config = SettingsConfigDict(env_file=".env", extra="allow") def model_post_init(self, __context) -> None: """Initialize the BAML registry after the model is created.""" - self.baml_registry.add_llm_client( - name=self.baml_llm_provider, - provider=self.baml_llm_provider, - options={ - "model": self.baml_llm_model, - "temperature": self.baml_llm_temperature, - "api_key": self.baml_llm_api_key, - "base_url": self.baml_llm_endpoint, - "api_version": self.baml_llm_api_version, - }, - ) - # Sets the primary client - self.baml_registry.set_primary(self.baml_llm_provider) + if ClientRegistry is not None: + self.baml_registry = ClientRegistry() + self.baml_registry.add_llm_client( + name=self.baml_llm_provider, + provider=self.baml_llm_provider, + options={ + "model": self.baml_llm_model, + "temperature": self.baml_llm_temperature, + "api_key": self.baml_llm_api_key, + "base_url": self.baml_llm_endpoint, + "api_version": self.baml_llm_api_version, + }, + ) + # Sets the primary client + self.baml_registry.set_primary(self.baml_llm_provider) @model_validator(mode="after") def ensure_env_vars_for_ollama(self) -> "LLMConfig": diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py index 697a52a45..8caabd937 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py @@ -37,6 +37,9 @@ async def extract_summary(content: str, response_model: Type[BaseModel]): """ config = get_llm_config() + if config.baml_registry is None: + raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + # Use BAML's SummarizeContent function summary_result = await b.SummarizeContent( content, baml_options={"client_registry": config.baml_registry} @@ -77,6 +80,9 @@ async def extract_code_summary(content: str): try: config = get_llm_config() + if config.baml_registry is None: + raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + result = await b.SummarizeCode( content, baml_options={"client_registry": config.baml_registry} ) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py index abff07e09..aa4fce637 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py @@ -16,6 +16,9 @@ async def extract_content_graph( get_logger(level="INFO") + if config.baml_registry is None: + raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + # if response_model: # # tb = TypeBuilder() # # country = tb.union \ diff --git a/pyproject.toml b/pyproject.toml index 272c8e929..d4ffedf5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,9 +28,7 @@ dependencies = [ "nltk>=3.9.1,<4.0.0", "numpy>=1.26.4, <=4.0.0", "pandas>=2.2.2,<3.0.0", - # Note: New s3fs and boto3 versions don't work well together - # Always use comaptible fixed versions of these two dependencies - "s3fs[boto3]==2025.3.2", + "sqlalchemy>=2.0.39,<3.0.0", "aiosqlite>=0.20.0,<1.0.0", "tiktoken>=0.8.0,<1.0.0", @@ -53,10 +51,10 @@ dependencies = [ "fastapi>=0.115.7,<1.0.0", "python-multipart>=0.0.20,<1.0.0", "fastapi-users[sqlalchemy]>=14.0.1,<15.0.0", - "dlt[sqlalchemy]>=1.9.0,<2", + "sentry-sdk[fastapi]>=2.9.0,<3", "structlog>=25.2.0,<26", - "baml-py (>=0.201.0,<0.202.0)", + "pympler>=1.1,<2.0.0", "onnxruntime>=1.0.0,<2.0.0", "pylance>=0.22.0,<1.0.0", @@ -116,14 +114,13 @@ evals = [ "plotly>=6.0.0,<7", "gdown>=5.2.0,<6", ] -gui = [ - "pyside6>=6.8.3,<7", - "qasync>=0.27.1,<0.28", -] + graphiti = ["graphiti-core>=0.7.0,<0.8"] # Note: New s3fs and boto3 versions don't work well together # Always use comaptible fixed versions of these two dependencies aws = ["s3fs[boto3]==2025.3.2"] +dlt = ["dlt[sqlalchemy]>=1.9.0,<2"] +baml = ["baml-py (>=0.201.0,<0.202.0)"] dev = [ "pytest>=7.4.0,<8", "pytest-cov>=6.1.1,<7.0.0", From 2413b7272b172121e679fd2e6dc47739006f1c10 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 17:34:55 +0200 Subject: [PATCH 02/25] clean up poetry --- cognee/infrastructure/llm/config.py | 1 + .../baml/baml_src/extraction/extract_summary.py | 8 ++++++-- .../extraction/knowledge_graph/extract_content_graph.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 1ff8f6433..b2a058ca6 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -3,6 +3,7 @@ from typing import Optional, ClassVar from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic import model_validator + try: from baml_py import ClientRegistry except ImportError: diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py index 8caabd937..89889d294 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py @@ -38,7 +38,9 @@ async def extract_summary(content: str, response_model: Type[BaseModel]): config = get_llm_config() if config.baml_registry is None: - raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + raise ImportError( + "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." + ) # Use BAML's SummarizeContent function summary_result = await b.SummarizeContent( @@ -81,7 +83,9 @@ async def extract_code_summary(content: str): config = get_llm_config() if config.baml_registry is None: - raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + raise ImportError( + "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." + ) result = await b.SummarizeCode( content, baml_options={"client_registry": config.baml_registry} diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py index aa4fce637..f87d87d1b 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py @@ -17,7 +17,9 @@ async def extract_content_graph( get_logger(level="INFO") if config.baml_registry is None: - raise ImportError("BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features.") + raise ImportError( + "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." + ) # if response_model: # # tb = TypeBuilder() From 2e07c6cbc10f09147fc2cfdd705b8e38cb3ae070 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 17:47:51 +0200 Subject: [PATCH 03/25] fixes to postgres issue --- .../relational/create_relational_engine.py | 13 +++++++++--- .../vector/pgvector/PGVectorAdapter.py | 21 +++++++++++++++---- pyproject.toml | 9 +++++--- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/cognee/infrastructure/databases/relational/create_relational_engine.py b/cognee/infrastructure/databases/relational/create_relational_engine.py index a889e1758..d38c506ad 100644 --- a/cognee/infrastructure/databases/relational/create_relational_engine.py +++ b/cognee/infrastructure/databases/relational/create_relational_engine.py @@ -39,8 +39,15 @@ def create_relational_engine( connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}" if db_provider == "postgres": - connection_string = ( - f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" - ) + try: + # Test if asyncpg is available + import asyncpg + connection_string = ( + f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" + ) + except ImportError: + raise ImportError( + "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PostgreSQL functionality." + ) return SQLAlchemyAdapter(connection_string) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 4dfd9792f..5d8fd3ae3 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -7,7 +7,16 @@ from sqlalchemy import JSON, Column, Table, select, delete, MetaData from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker from sqlalchemy.exc import ProgrammingError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential -from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError +try: + from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError +except ImportError: + # PostgreSQL dependencies not installed, define dummy exceptions + class DeadlockDetectedError(Exception): + pass + class DuplicateTableError(Exception): + pass + class UniqueViolationError(Exception): + pass from cognee.shared.logging_utils import get_logger @@ -70,9 +79,13 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Has to be imported at class level # Functions reading tables from database need to know what a Vector column type is - from pgvector.sqlalchemy import Vector - - self.Vector = Vector + try: + from pgvector.sqlalchemy import Vector + self.Vector = Vector + except ImportError: + raise ImportError( + "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PGVector functionality." + ) async def embed_data(self, data: list[str]) -> list[list[float]]: """ diff --git a/pyproject.toml b/pyproject.toml index d4ffedf5a..0d076e2e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,15 +72,18 @@ distributed = [ "modal>=1.0.5,<2.0.0", ] +# Database backends neo4j = ["neo4j>=5.28.0,<6"] neptune = ["langchain_aws>=0.2.22"] +# PostgreSQL support (binary - no compilation required) postgres = [ - "psycopg2>=2.9.10,<3", + "psycopg2-binary>=2.9.10,<3.0.0", # Pre-compiled binary, no PostgreSQL headers needed "pgvector>=0.3.5,<0.4", "asyncpg>=0.30.0,<1.0.0", ] -postgres-binary = [ - "psycopg2-binary>=2.9.10,<3.0.0", +# PostgreSQL support (source - requires PostgreSQL development headers) +postgres-source = [ + "psycopg2>=2.9.10,<3 ; platform_system != 'Windows'", # Requires libpq-dev, build tools "pgvector>=0.3.5,<0.4", "asyncpg>=0.30.0,<1.0.0", ] From 5bc4a6cc0286bf5263f6d0f9174b4bab96e1d6ae Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 17:57:39 +0200 Subject: [PATCH 04/25] fixes to postgres issue --- .../relational/create_relational_engine.py | 1 + .../vector/pgvector/PGVectorAdapter.py | 4 ++++ .../files/utils/open_data_file.py | 21 +++++++------------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/cognee/infrastructure/databases/relational/create_relational_engine.py b/cognee/infrastructure/databases/relational/create_relational_engine.py index d38c506ad..4f117bf4c 100644 --- a/cognee/infrastructure/databases/relational/create_relational_engine.py +++ b/cognee/infrastructure/databases/relational/create_relational_engine.py @@ -42,6 +42,7 @@ def create_relational_engine( try: # Test if asyncpg is available import asyncpg + connection_string = ( f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" ) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 5d8fd3ae3..b2e2bf8c7 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -7,14 +7,17 @@ from sqlalchemy import JSON, Column, Table, select, delete, MetaData from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker from sqlalchemy.exc import ProgrammingError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential + try: from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError except ImportError: # PostgreSQL dependencies not installed, define dummy exceptions class DeadlockDetectedError(Exception): pass + class DuplicateTableError(Exception): pass + class UniqueViolationError(Exception): pass @@ -81,6 +84,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Functions reading tables from database need to know what a Vector column type is try: from pgvector.sqlalchemy import Vector + self.Vector = Vector except ImportError: raise ImportError( diff --git a/cognee/infrastructure/files/utils/open_data_file.py b/cognee/infrastructure/files/utils/open_data_file.py index 171f5deb7..fcfca4161 100644 --- a/cognee/infrastructure/files/utils/open_data_file.py +++ b/cognee/infrastructure/files/utils/open_data_file.py @@ -4,7 +4,6 @@ from urllib.parse import urlparse from contextlib import asynccontextmanager from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path -from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage @@ -23,23 +22,17 @@ async def open_data_file(file_path: str, mode: str = "rb", encoding: str = None, yield file elif file_path.startswith("s3://"): + try: + from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage + except ImportError: + raise ImportError( + "S3 dependencies are not installed. Please install with 'pip install cognee[aws]' to use S3 functionality." + ) + normalized_url = get_data_file_path(file_path) s3_dir_path = os.path.dirname(normalized_url) s3_filename = os.path.basename(normalized_url) - # if "/" in s3_path: - # s3_dir = "/".join(s3_path.split("/")[:-1]) - # s3_filename = s3_path.split("/")[-1] - # else: - # s3_dir = "" - # s3_filename = s3_path - - # Extract filesystem path from S3 URL structure - # file_dir_path = ( - # f"s3://{parsed_url.netloc}/{s3_dir}" if s3_dir else f"s3://{parsed_url.netloc}" - # ) - # file_name = s3_filename - file_storage = S3FileStorage(s3_dir_path) async with file_storage.open(s3_filename, mode=mode, **kwargs) as file: From 64d6d6ede26e28e19e69696a57600f00e4ee8a93 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 18:18:17 +0200 Subject: [PATCH 05/25] added new optionals --- cognee/api/client.py | 18 ++++++++++++------ cognee/modules/observability/get_observe.py | 13 +++++++++++-- .../cognee_network_visualization.py | 8 +++++++- cognee/shared/utils.py | 1 - pyproject.toml | 8 ++------ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/cognee/api/client.py b/cognee/api/client.py index 215e4a17e..d72614e14 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -3,7 +3,6 @@ import os import uvicorn -import sentry_sdk from traceback import format_exc from contextlib import asynccontextmanager from fastapi import Request @@ -37,11 +36,18 @@ from cognee.api.v1.users.routers import ( logger = get_logger() if os.getenv("ENV", "prod") == "prod": - sentry_sdk.init( - dsn=os.getenv("SENTRY_REPORTING_URL"), - traces_sample_rate=1.0, - profiles_sample_rate=1.0, - ) + try: + import sentry_sdk + + sentry_sdk.init( + dsn=os.getenv("SENTRY_REPORTING_URL"), + traces_sample_rate=1.0, + profiles_sample_rate=1.0, + ) + except ImportError: + logger.info( + "Sentry SDK not available. Install with 'pip install cognee[monitoring]' to enable error monitoring." + ) app_environment = os.getenv("ENV", "prod") diff --git a/cognee/modules/observability/get_observe.py b/cognee/modules/observability/get_observe.py index db3655482..cbd55f072 100644 --- a/cognee/modules/observability/get_observe.py +++ b/cognee/modules/observability/get_observe.py @@ -6,6 +6,15 @@ def get_observe(): monitoring = get_base_config().monitoring_tool if monitoring == Observer.LANGFUSE: - from langfuse.decorators import observe + try: + from langfuse.decorators import observe - return observe + return observe + except ImportError: + # Return a no-op decorator if Langfuse is not available + def noop_observe(func=None, **kwargs): + if func is None: + return lambda f: f + return func + + return noop_observe diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index dde2fe98d..ef9b2f126 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -1,6 +1,5 @@ import os import json -import networkx from cognee.shared.logging_utils import get_logger from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage @@ -9,6 +8,13 @@ logger = get_logger() async def cognee_network_visualization(graph_data, destination_file_path: str = None): + try: + import networkx + except ImportError: + raise ImportError( + "NetworkX is not installed. Please install with 'pip install cognee[visualization]' to use graph visualization features." + ) + nodes_data, edges_data = graph_data G = networkx.DiGraph() diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index fb4193a8c..22557d2ac 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -3,7 +3,6 @@ import os import requests from datetime import datetime, timezone -import matplotlib.pyplot as plt import http.server import socketserver from threading import Thread diff --git a/pyproject.toml b/pyproject.toml index 0d076e2e5..5cb378a0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,15 +34,12 @@ dependencies = [ "tiktoken>=0.8.0,<1.0.0", "litellm>=1.71.0, <2.0.0", "instructor>=1.9.1,<2.0.0", - "langfuse>=2.32.0,<3", "filetype>=1.2.0,<2.0.0", "aiohttp>=3.11.14,<4.0.0", "aiofiles>=23.2.1,<24.0.0", "rdflib>=7.1.4,<7.2.0", "pypdf>=4.1.0,<6.0.0", "jinja2>=3.1.3,<4", - "matplotlib>=3.8.3,<4", - "networkx>=3.4.2,<4", "lancedb>=0.24.0,<1.0.0", "alembic>=1.13.3,<2", "pre-commit>=4.0.1,<5", @@ -51,10 +48,7 @@ dependencies = [ "fastapi>=0.115.7,<1.0.0", "python-multipart>=0.0.20,<1.0.0", "fastapi-users[sqlalchemy]>=14.0.1,<15.0.0", - - "sentry-sdk[fastapi]>=2.9.0,<3", "structlog>=25.2.0,<26", - "pympler>=1.1,<2.0.0", "onnxruntime>=1.0.0,<2.0.0", "pylance>=0.22.0,<1.0.0", @@ -141,6 +135,8 @@ dev = [ "mkdocstrings[python]>=0.26.2,<0.27", ] debug = ["debugpy>=1.8.9,<2.0.0"] +visualization = ["networkx>=3.4.2,<4", "matplotlib>=3.8.3,<4"] +monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"] [project.urls] Homepage = "https://www.cognee.ai" From 38bbfd42cf66c16708a87e95eff047583c2c210e Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 19:14:16 +0200 Subject: [PATCH 06/25] added lancedb pandas removal --- cognee/eval_framework/modal_eval_dashboard.py | 14 +- .../vector/lancedb/LanceDBAdapter.py | 13 +- cognee/tasks/graph/infer_data_ontology.py | 309 ------------------ pyproject.toml | 3 +- 4 files changed, 21 insertions(+), 318 deletions(-) delete mode 100644 cognee/tasks/graph/infer_data_ontology.py diff --git a/cognee/eval_framework/modal_eval_dashboard.py b/cognee/eval_framework/modal_eval_dashboard.py index acc0c3aa9..9ff6f543c 100644 --- a/cognee/eval_framework/modal_eval_dashboard.py +++ b/cognee/eval_framework/modal_eval_dashboard.py @@ -1,6 +1,10 @@ import os import json -import pandas as pd + +try: + import pandas as pd +except ImportError: + pd = None import subprocess import modal import streamlit as st @@ -12,7 +16,7 @@ metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_i image = ( modal.Image.debian_slim(python_version="3.11") - .pip_install("streamlit", "pandas", "plotly") + .pip_install("streamlit", "plotly") .add_local_file(__file__, "/root/serve_dashboard.py") ) @@ -78,6 +82,12 @@ def main(): } ) + if pd is None: + st.error( + "Pandas is required for the evaluation dashboard. Install with 'pip install cognee[evals]' to use this feature." + ) + return + df = pd.DataFrame(records) if df.empty: st.warning("No JSON files found in the volume.") diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index 0184ec3ee..7bc3385b9 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -205,9 +205,12 @@ class LanceDBAdapter(VectorDBInterface): collection = await self.get_collection(collection_name) if len(data_point_ids) == 1: - results = await collection.query().where(f"id = '{data_point_ids[0]}'").to_pandas() + results = await collection.query().where(f"id = '{data_point_ids[0]}'") else: - results = await collection.query().where(f"id IN {tuple(data_point_ids)}").to_pandas() + results = await collection.query().where(f"id IN {tuple(data_point_ids)}") + + # Convert query results to list format + results_list = results.to_list() if hasattr(results, "to_list") else list(results) return [ ScoredResult( @@ -215,7 +218,7 @@ class LanceDBAdapter(VectorDBInterface): payload=result["payload"], score=0, ) - for result in results.to_dict("index").values() + for result in results_list ] async def search( @@ -242,9 +245,9 @@ class LanceDBAdapter(VectorDBInterface): if limit == 0: return [] - results = await collection.vector_search(query_vector).limit(limit).to_pandas() + result_values = await collection.vector_search(query_vector).limit(limit).to_list() - result_values = list(results.to_dict("index").values()) + # result_values = list(results.to_dict("index").values()) if not result_values: return [] diff --git a/cognee/tasks/graph/infer_data_ontology.py b/cognee/tasks/graph/infer_data_ontology.py deleted file mode 100644 index 93b02db9f..000000000 --- a/cognee/tasks/graph/infer_data_ontology.py +++ /dev/null @@ -1,309 +0,0 @@ -# PROPOSED TO BE DEPRECATED - -"""This module contains the OntologyEngine class which is responsible for adding graph ontology from a JSON or CSV file.""" - -import csv -import json -from cognee.shared.logging_utils import get_logger -from datetime import datetime, timezone -from fastapi import status -from typing import Any, Dict, List, Optional, Union, Type - -import aiofiles -import pandas as pd -from pydantic import BaseModel - -from cognee.modules.graph.exceptions import EntityNotFoundError -from cognee.modules.ingestion.exceptions import IngestionError - -from cognee.infrastructure.data.chunking.config import get_chunk_config -from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine -from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine -from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file -from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException -from cognee.modules.data.methods.add_model_class_to_graph import ( - add_model_class_to_graph, -) -from cognee.tasks.graph.models import NodeModel, GraphOntology -from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.engine.utils import generate_node_id, generate_node_name -from cognee.infrastructure.llm.LLMGateway import LLMGateway - -logger = get_logger("task:infer_data_ontology") - - -async def extract_ontology(content: str, response_model: Type[BaseModel]): - """ - Extracts structured ontology from the provided content using a pre-defined LLM client. - - This asynchronous function retrieves a system prompt from a file and utilizes an LLM - client to create a structured output based on the input content and specified response - model. - - Parameters: - ----------- - - - content (str): The content from which to extract the ontology. - - response_model (Type[BaseModel]): The model that defines the structure of the - output ontology. - - Returns: - -------- - - The structured ontology extracted from the content. - """ - - system_prompt = LLMGateway.read_query_prompt("extract_ontology.txt") - - ontology = await LLMGateway.acreate_structured_output(content, system_prompt, response_model) - - return ontology - - -class OntologyEngine: - """ - Manage ontology data and operations for graph structures, providing methods for data - loading, flattening models, and adding ontological relationships to a graph database. - - Public methods: - - - flatten_model - - recursive_flatten - - load_data - - add_graph_ontology - """ - - async def flatten_model( - self, model: NodeModel, parent_id: Optional[str] = None - ) -> Dict[str, Any]: - """ - Flatten the model to a dictionary including optional parent ID and relationship details - if available. - - Parameters: - ----------- - - - model (NodeModel): The NodeModel instance to flatten. - - parent_id (Optional[str]): An optional ID of the parent node for hierarchical - purposes. (default None) - - Returns: - -------- - - - Dict[str, Any]: A dictionary representation of the model with flattened - attributes. - """ - result = model.dict() - result["parent_id"] = parent_id - if model.default_relationship: - result.update( - { - "relationship_type": model.default_relationship.type, - "relationship_source": model.default_relationship.source, - "relationship_target": model.default_relationship.target, - } - ) - return result - - async def recursive_flatten( - self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None - ) -> List[Dict[str, Any]]: - """ - Recursively flatten a hierarchical structure of models into a flat list of dictionaries. - - Parameters: - ----------- - - - items (Union[List[Dict[str, Any]], Dict[str, Any]]): A list or dictionary - containing models to flatten. - - parent_id (Optional[str]): An optional ID of the parent node to maintain hierarchy - during flattening. (default None) - - Returns: - -------- - - - List[Dict[str, Any]]: A flat list of dictionaries representing the hierarchical - model structure. - """ - flat_list = [] - - if isinstance(items, list): - for item in items: - flat_list.extend(await self.recursive_flatten(item, parent_id)) - elif isinstance(items, dict): - model = NodeModel.model_validate(items) - flat_list.append(await self.flatten_model(model, parent_id)) - for child in model.children: - flat_list.extend(await self.recursive_flatten(child, model.node_id)) - return flat_list - - async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]: - """ - Load data from a specified JSON or CSV file and return it in a structured format. - - Parameters: - ----------- - - - file_path (str): The path to the file to load data from. - - Returns: - -------- - - - Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a - list of dictionaries or a single dictionary depending on content type. - """ - try: - if file_path.endswith(".json"): - async with aiofiles.open(file_path, mode="r") as f: - data = await f.read() - return json.loads(data) - elif file_path.endswith(".csv"): - async with aiofiles.open(file_path, mode="r") as f: - content = await f.read() - reader = csv.DictReader(content.splitlines()) - return list(reader) - else: - raise IngestionError(message="Unsupported file format") - except Exception as e: - raise IngestionError( - message=f"Failed to load data from {file_path}: {e}", - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - ) - - async def add_graph_ontology(self, file_path: str = None, documents: list = None): - """ - Add graph ontology from a JSON or CSV file, or infer relationships from provided - document content. Raise exceptions for invalid file types or missing entities. - - Parameters: - ----------- - - - file_path (str): Optional path to a file containing data to be loaded. (default - None) - - documents (list): Optional list of document objects for content extraction if no - file path is provided. (default None) - """ - if file_path is None: - initial_chunks_and_ids = [] - - chunk_config = get_chunk_config() - chunk_engine = get_chunk_engine() - chunk_strategy = chunk_config.chunk_strategy - - for base_file in documents: - with open(base_file.raw_data_location, "rb") as file: - try: - file_type = guess_file_type(file) - text = extract_text_from_file(file, file_type) - - subchunks, chunks_with_ids = chunk_engine.chunk_data( - chunk_strategy, - text, - chunk_config.chunk_size, - chunk_config.chunk_overlap, - ) - - if chunks_with_ids[0][0] == 1: - initial_chunks_and_ids.append({base_file.id: chunks_with_ids}) - - except FileTypeException: - logger.warning( - "File (%s) has an unknown file type. We are skipping it.", file["id"] - ) - - ontology = await extract_ontology(str(initial_chunks_and_ids), GraphOntology) - graph_client = await get_graph_engine() - - await graph_client.add_nodes( - [ - ( - node.id, - dict( - uuid=generate_node_id(node.id), - name=generate_node_name(node.name), - type=generate_node_id(node.id), - description=node.description, - updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - ), - ) - for node in ontology.nodes - ] - ) - - await graph_client.add_edges( - ( - generate_node_id(edge.source_id), - generate_node_id(edge.target_id), - edge.relationship_type, - dict( - source_node_id=generate_node_id(edge.source_id), - target_node_id=generate_node_id(edge.target_id), - relationship_name=edge.relationship_type, - updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - ), - ) - for edge in ontology.edges - ) - - else: - dataset_level_information = documents[0][1] - - # Extract the list of valid IDs from the explanations - valid_ids = {item["id"] for item in dataset_level_information} - try: - data = await self.load_data(file_path) - flt_ontology = await self.recursive_flatten(data) - df = pd.DataFrame(flt_ontology) - graph_client = await get_graph_engine() - - for _, row in df.iterrows(): - node_data = row.to_dict() - node_id = node_data.pop("node_id", None) - if node_id in valid_ids: - await graph_client.add_node(node_id, node_data) - if node_id not in valid_ids: - raise EntityNotFoundError( - message=f"Node ID {node_id} not found in the dataset" - ) - if pd.notna(row.get("relationship_source")) and pd.notna( - row.get("relationship_target") - ): - await graph_client.add_edge( - row["relationship_source"], - row["relationship_target"], - relationship_name=row["relationship_type"], - edge_properties={ - "source_node_id": row["relationship_source"], - "target_node_id": row["relationship_target"], - "relationship_name": row["relationship_type"], - "updated_at": datetime.now(timezone.utc).strftime( - "%Y-%m-%d %H:%M:%S" - ), - }, - ) - - return - except Exception as e: - raise RuntimeError(f"Failed to add graph ontology from {file_path}: {e}") from e - - -async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None): - """ - Infer data ontology from provided documents and optionally add it to a graph. - - Parameters: - ----------- - - - documents: The documents from which to infer the ontology. - - ontology_model: The ontology model to use for the inference, defaults to - KnowledgeGraph. (default KnowledgeGraph) - - root_node_id: An optional root node identifier for the ontology. (default None) - """ - if ontology_model == KnowledgeGraph: - ontology_engine = OntologyEngine() - root_node_id = await ontology_engine.add_graph_ontology(documents=documents) - else: - graph_engine = await get_graph_engine() - await add_model_class_to_graph(ontology_model, graph_engine) - - yield (documents, root_node_id) diff --git a/pyproject.toml b/pyproject.toml index 5cb378a0c..b17b4893a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,6 @@ dependencies = [ "typing_extensions>=4.12.2,<5.0.0", "nltk>=3.9.1,<4.0.0", "numpy>=1.26.4, <=4.0.0", - "pandas>=2.2.2,<3.0.0", - "sqlalchemy>=2.0.39,<3.0.0", "aiosqlite>=0.20.0,<1.0.0", "tiktoken>=0.8.0,<1.0.0", @@ -110,6 +108,7 @@ codegraph = [ evals = [ "plotly>=6.0.0,<7", "gdown>=5.2.0,<6", + "pandas>=2.2.2,<3.0.0", ] graphiti = ["graphiti-core>=0.7.0,<0.8"] From c85302f467be683e2edc52d34439063859ae1d84 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 19:42:30 +0200 Subject: [PATCH 07/25] added lancedb pandas removal --- poetry.lock | 261 ++++++++++++++++++++++------------------------------ 1 file changed, 109 insertions(+), 152 deletions(-) diff --git a/poetry.lock b/poetry.lock index 109e5d917..5ef1c5783 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,12 +1,13 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "aiobotocore" version = "2.24.1" description = "Async client for aws services using botocore and aiohttp" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "aiobotocore-2.24.1-py3-none-any.whl", hash = "sha256:557922823455ca65bbd065b363b54846f16b9c4b6bd0b61ecdfa01ca13a04531"}, {file = "aiobotocore-2.24.1.tar.gz", hash = "sha256:59237f1b2d4ff619f9a9e78360b691d59b92fdd4d03d054dbd2eeff8ada5667e"}, @@ -164,9 +165,10 @@ speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (> name = "aioitertools" version = "0.12.0" description = "itertools and builtins for AsyncIO and mixed iterables" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"}, {file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"}, @@ -455,7 +457,7 @@ description = "An asyncio PostgreSQL driver" optional = true python-versions = ">=3.8.0" groups = ["main"] -markers = "extra == \"postgres\" or extra == \"postgres-binary\"" +markers = "extra == \"postgres\" or extra == \"postgres-source\"" files = [ {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"}, {file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"}, @@ -556,9 +558,10 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)" name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -optional = false +optional = true python-versions = ">=3.7,<4.0" groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"posthog\" or extra == \"chromadb\" or extra == \"docs\" or extra == \"monitoring\"" files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -589,9 +592,10 @@ extras = ["regex"] name = "baml-py" version = "0.201.0" description = "BAML python bindings (pyproject.toml)" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"baml\"" files = [ {file = "baml_py-0.201.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:83228d2af2b0e845bbbb4e14f7cbd3376cec385aee01210ac522ab6076e07bec"}, {file = "baml_py-0.201.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a9d016139e3ae5b5ce98c7b05b5fbd53d5d38f04dc810ec4d70fb17dd6c10e4"}, @@ -739,9 +743,10 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] name = "boto3" version = "1.39.11" description = "The AWS SDK for Python" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\"" files = [ {file = "boto3-1.39.11-py3-none-any.whl", hash = "sha256:af8f1dad35eceff7658fab43b39b0f55892b6e3dd12308733521cc24dd2c9a02"}, {file = "boto3-1.39.11.tar.gz", hash = "sha256:3027edf20642fe1d5f9dc50a420d0fe2733073ed6a9f0f047b60fe08c3682132"}, @@ -759,9 +764,10 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.39.11" description = "Low-level, data-driven core of boto 3." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\"" files = [ {file = "botocore-1.39.11-py3-none-any.whl", hash = "sha256:1545352931a8a186f3e977b1e1a4542d7d434796e274c3c62efd0210b5ea76dc"}, {file = "botocore-1.39.11.tar.gz", hash = "sha256:953b12909d6799350e346ab038e55b6efe622c616f80aef74d7a6683ffdd972c"}, @@ -1176,10 +1182,10 @@ test = ["pytest"] name = "contourpy" version = "1.3.2" description = "Python library for calculating contours of 2D quadrilateral grids" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version == \"3.10\" and extra == \"visualization\"" files = [ {file = "contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934"}, {file = "contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989"}, @@ -1254,10 +1260,10 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist" name = "contourpy" version = "1.3.3" description = "Python library for calculating contours of 2D quadrilateral grids" -optional = false +optional = true python-versions = ">=3.11" groups = ["main"] -markers = "python_version >= \"3.11\"" +markers = "python_version >= \"3.11\" and extra == \"visualization\"" files = [ {file = "contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1"}, {file = "contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381"}, @@ -1524,9 +1530,10 @@ files = [ name = "cycler" version = "0.12.1" description = "Composable style cycles" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"visualization\"" files = [ {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, @@ -1815,9 +1822,10 @@ files = [ name = "dlt" version = "1.15.0" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." -optional = false +optional = true python-versions = "<3.15,>=3.9.2" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "dlt-1.15.0-py3-none-any.whl", hash = "sha256:fdc1e8a47b6daae9d7f235de1146427a40518960f46089c3ae2b3c7ce5f66cd9"}, {file = "dlt-1.15.0.tar.gz", hash = "sha256:3dff1419649c984c183ba2ae53bfa60f4d0d7cf3590c1388997886dbe7bfee97"}, @@ -2236,9 +2244,10 @@ files = [ name = "fonttools" version = "4.59.1" description = "Tools to manipulate font files" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"visualization\"" files = [ {file = "fonttools-4.59.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e90a89e52deb56b928e761bb5b5f65f13f669bfd96ed5962975debea09776a23"}, {file = "fonttools-4.59.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d29ab70658d2ec19422b25e6ace00a0b0ae4181ee31e03335eaef53907d2d83"}, @@ -2525,9 +2534,10 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "gitdb" version = "4.0.12" description = "Git Object Database" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"dlt\" or extra == \"dev\"" files = [ {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"}, {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"}, @@ -2540,9 +2550,10 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.45" description = "GitPython is a Python library used to interact with Git repositories" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"dlt\" or extra == \"dev\"" files = [ {file = "gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77"}, {file = "gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c"}, @@ -2559,9 +2570,10 @@ test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock ; python_version < \"3. name = "giturlparse" version = "0.12.0" description = "A Git URL parsing module (supports parsing and rewriting)" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "giturlparse-0.12.0-py2.py3-none-any.whl", hash = "sha256:412b74f2855f1da2fefa89fd8dde62df48476077a72fc19b62039554d27360eb"}, {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"}, @@ -3051,9 +3063,10 @@ hyperframe = ">=6.1,<7" name = "hexbytes" version = "1.3.1" description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" -optional = false +optional = true python-versions = "<4,>=3.8" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "hexbytes-1.3.1-py3-none-any.whl", hash = "sha256:da01ff24a1a9a2b1881c4b85f0e9f9b0f51b526b379ffa23832ae7899d29c2c7"}, {file = "hexbytes-1.3.1.tar.gz", hash = "sha256:a657eebebdfe27254336f98d8af6e2236f3f83aed164b87466b6cf6c5f5a4765"}, @@ -3327,9 +3340,10 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve name = "humanize" version = "4.13.0" description = "Python humanize utilities" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "humanize-4.13.0-py3-none-any.whl", hash = "sha256:b810820b31891813b1673e8fec7f1ed3312061eab2f26e3fa192c393d11ed25f"}, {file = "humanize-4.13.0.tar.gz", hash = "sha256:78f79e68f76f0b04d711c4e55d32bebef5be387148862cb1ef83d2b58e7935a0"}, @@ -3769,9 +3783,10 @@ files = [ name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\"" files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -3837,11 +3852,14 @@ jsonpointer = ">=1.9" name = "jsonpath-ng" version = "1.7.0" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -4148,9 +4166,10 @@ test = ["hatch", "ipykernel", "openapi-core (>=0.18.0,<0.19.0)", "openapi-spec-v name = "kiwisolver" version = "1.4.9" description = "A fast implementation of the Cassowary constraint solver" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"visualization\"" files = [ {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b"}, {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f"}, @@ -4421,9 +4440,10 @@ six = "*" name = "langfuse" version = "2.60.9" description = "A client library for accessing langfuse" -optional = false +optional = true python-versions = "<4.0,>=3.9" groups = ["main"] +markers = "extra == \"monitoring\"" files = [ {file = "langfuse-2.60.9-py3-none-any.whl", hash = "sha256:e4291a66bc579c66d7652da5603ca7f0409536700d7b812e396780b5d9a0685d"}, {file = "langfuse-2.60.9.tar.gz", hash = "sha256:040753346d7df4a0be6967dfc7efe3de313fee362524fe2f801867fcbbca3c98"}, @@ -5015,9 +5035,10 @@ tests = ["pytest", "simplejson"] name = "matplotlib" version = "3.10.5" description = "Python plotting package" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"visualization\"" files = [ {file = "matplotlib-3.10.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5d4773a6d1c106ca05cb5a5515d277a6bb96ed09e5c8fab6b7741b8fcaa62c8f"}, {file = "matplotlib-3.10.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc88af74e7ba27de6cbe6faee916024ea35d895ed3d61ef6f58c4ce97da7185a"}, @@ -5935,10 +5956,10 @@ files = [ name = "networkx" version = "3.4.2" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version == \"3.10\" and (extra == \"llama-index\" or extra == \"docs\" or extra == \"visualization\")" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -5956,10 +5977,10 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "networkx" version = "3.5" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.11" groups = ["main"] -markers = "python_version >= \"3.11\"" +markers = "python_version >= \"3.11\" and (extra == \"llama-index\" or extra == \"docs\" or extra == \"visualization\")" files = [ {file = "networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec"}, {file = "networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037"}, @@ -6452,10 +6473,10 @@ test = ["pytest (>=8.3.0,<8.4.0)", "pytest-benchmark (>=5.1.0,<5.2.0)", "pytest- name = "orjson" version = "3.11.3" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] -markers = "(sys_platform != \"emscripten\" or extra == \"neptune\" or extra == \"langchain\") and (sys_platform != \"emscripten\" or platform_python_implementation != \"PyPy\")" +markers = "(sys_platform != \"emscripten\" or platform_python_implementation != \"PyPy\") and (sys_platform != \"emscripten\" or extra == \"neptune\" or extra == \"langchain\") and (platform_python_implementation != \"PyPy\" or extra == \"dlt\") and (extra == \"neptune\" or extra == \"langchain\" or extra == \"dlt\")" files = [ {file = "orjson-3.11.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:29cb1f1b008d936803e2da3d7cba726fc47232c45df531b29edf0b232dd737e7"}, {file = "orjson-3.11.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97dceed87ed9139884a55db8722428e27bd8452817fbf1869c58b49fecab1120"}, @@ -6587,9 +6608,10 @@ lint = ["black"] name = "pandas" version = "2.3.2" description = "Powerful data structures for data analysis, time series, and statistics" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"chromadb\" or extra == \"docs\" or extra == \"evals\"" files = [ {file = "pandas-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52bc29a946304c360561974c6542d1dd628ddafa69134a7131fdfd6a5d7a1a35"}, {file = "pandas-2.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:220cc5c35ffaa764dd5bb17cf42df283b5cb7fdf49e10a7b053a06c9cb48ee2b"}, @@ -6717,9 +6739,10 @@ files = [ name = "pathvalidate" version = "3.3.1" description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f"}, {file = "pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177"}, @@ -6734,9 +6757,10 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1 name = "pendulum" version = "3.1.0" description = "Python datetimes made easy" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"}, {file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"}, @@ -6834,7 +6858,7 @@ description = "pgvector support for Python" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"postgres\" or extra == \"postgres-binary\"" +markers = "extra == \"postgres\" or extra == \"postgres-source\"" files = [ {file = "pgvector-0.3.6-py3-none-any.whl", hash = "sha256:f6c269b3c110ccb7496bac87202148ed18f34b390a0189c783e351062400a75a"}, {file = "pgvector-0.3.6.tar.gz", hash = "sha256:31d01690e6ea26cea8a633cde5f0f55f5b246d9c8292d68efdef8c22ec994ade"}, @@ -6847,9 +6871,10 @@ numpy = "*" name = "pillow" version = "11.3.0" description = "Python Imaging Library (Fork)" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "(extra == \"codegraph\" or extra == \"llama-index\" or extra == \"mistral\" or extra == \"docs\" or extra == \"visualization\") and (python_version <= \"3.12\" or extra == \"llama-index\" or extra == \"mistral\" or extra == \"docs\" or extra == \"visualization\")" files = [ {file = "pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860"}, {file = "pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad"}, @@ -7014,9 +7039,10 @@ kaleido = ["kaleido (>=1.0.0)"] name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\"" files = [ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, @@ -7030,9 +7056,10 @@ testing = ["coverage", "pytest", "pytest-benchmark"] name = "ply" version = "3.11" description = "Python Lex & Yacc" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, @@ -7317,7 +7344,7 @@ description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"postgres\"" +markers = "extra == \"postgres-source\" and platform_system != \"Windows\"" files = [ {file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"}, {file = "psycopg2-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:c6f7b8561225f9e711a9c47087388a97fdc948211c10a4bccbf0ba68ab7b3b5a"}, @@ -7337,7 +7364,7 @@ description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"postgres-binary\"" +markers = "extra == \"postgres\"" files = [ {file = "psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2"}, {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f"}, @@ -7386,6 +7413,7 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -8097,66 +8125,6 @@ files = [ [package.extras] dev = ["build", "flake8", "mypy", "pytest", "twine"] -[[package]] -name = "pyside6" -version = "6.9.2" -description = "Python bindings for the Qt cross-platform application and UI framework" -optional = true -python-versions = "<3.14,>=3.9" -groups = ["main"] -markers = "extra == \"gui\"" -files = [ - {file = "pyside6-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:71245c76bfbe5c41794ffd8546730ec7cc869d4bbe68535639e026e4ef8a7714"}, - {file = "pyside6-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:64a9e2146e207d858e00226f68d7c1b4ab332954742a00dcabb721bb9e4aa0cd"}, - {file = "pyside6-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a78fad16241a1f2ed0fa0098cf3d621f591fc75b4badb7f3fa3959c9d861c806"}, - {file = "pyside6-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:d1afbf48f9a5612b9ee2dc7c384c1a65c08b5830ba5e7d01f66d82678e5459df"}, - {file = "pyside6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:1499b1d7629ab92119118e2636b4ace836b25e457ddf01003fdca560560b8c0a"}, -] - -[package.dependencies] -PySide6_Addons = "6.9.2" -PySide6_Essentials = "6.9.2" -shiboken6 = "6.9.2" - -[[package]] -name = "pyside6-addons" -version = "6.9.2" -description = "Python bindings for the Qt cross-platform application and UI framework (Addons)" -optional = true -python-versions = "<3.14,>=3.9" -groups = ["main"] -markers = "extra == \"gui\"" -files = [ - {file = "pyside6_addons-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:7019fdcc0059626eb1608b361371f4dc8cb7f2d02f066908fd460739ff5a07cd"}, - {file = "pyside6_addons-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:24350e5415317f269e743d1f7b4933fe5f59d90894aa067676c9ce6bfe9e7988"}, - {file = "pyside6_addons-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:af8dee517de8d336735a6543f7dd496eb580e852c14b4d2304b890e2a29de499"}, - {file = "pyside6_addons-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:98d2413904ee4b2b754b077af7875fa6ec08468c01a6628a2c9c3d2cece4874f"}, - {file = "pyside6_addons-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:b430cae782ff1a99fb95868043557f22c31b30c94afb9cf73278584e220a2ab6"}, -] - -[package.dependencies] -PySide6_Essentials = "6.9.2" -shiboken6 = "6.9.2" - -[[package]] -name = "pyside6-essentials" -version = "6.9.2" -description = "Python bindings for the Qt cross-platform application and UI framework (Essentials)" -optional = true -python-versions = "<3.14,>=3.9" -groups = ["main"] -markers = "extra == \"gui\"" -files = [ - {file = "pyside6_essentials-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:713eb8dcbb016ff10e6fca129c1bf2a0fd8cfac979e689264e0be3b332f9398e"}, - {file = "pyside6_essentials-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:84b8ca4fa56506e2848bdb4c7a0851a5e7adcb916bef9bce25ce2eeb6c7002cc"}, - {file = "pyside6_essentials-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:d0f701503974bd51b408966539aa6956f3d8536e547ea8002fbfb3d77796bbc3"}, - {file = "pyside6_essentials-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:b2f746f795138ac63eb173f9850a6db293461a1b6ce22cf6dafac7d194a38951"}, - {file = "pyside6_essentials-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:ecd7b5cd9e271f397fb89a6357f4ec301d8163e50869c6c557f9ccc6bed42789"}, -] - -[package.dependencies] -shiboken6 = "6.9.2" - [[package]] name = "pysocks" version = "1.7.1" @@ -8295,9 +8263,10 @@ testing = ["filelock"] name = "python-dateutil" version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\" or extra == \"notebook\" or extra == \"dev\" or extra == \"deepeval\" or extra == \"posthog\" or extra == \"chromadb\" or extra == \"falkordb\" or extra == \"docs\" or extra == \"evals\" or extra == \"dlt\" or extra == \"visualization\"" files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -8450,9 +8419,10 @@ XlsxWriter = ">=0.5.7" name = "pytz" version = "2025.2" description = "World timezone definitions, modern and historical" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"neo4j\" or extra == \"graphiti\" or extra == \"chromadb\" or extra == \"docs\" or extra == \"evals\" or extra == \"dlt\"" files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -8465,7 +8435,7 @@ description = "Python for Window Extensions" optional = false python-versions = "*" groups = ["main"] -markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" +markers = "(platform_system == \"Windows\" or platform_python_implementation != \"PyPy\" or extra == \"dlt\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"dlt\") and (platform_system == \"Windows\" or sys_platform == \"win32\")" files = [ {file = "pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3"}, {file = "pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b"}, @@ -8692,19 +8662,6 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} -[[package]] -name = "qasync" -version = "0.27.1" -description = "Python library for using asyncio in Qt-based applications" -optional = true -python-versions = ">=3.8,<4.0" -groups = ["main"] -markers = "extra == \"gui\"" -files = [ - {file = "qasync-0.27.1-py3-none-any.whl", hash = "sha256:5d57335723bc7d9b328dadd8cb2ed7978640e4bf2da184889ce50ee3ad2602c7"}, - {file = "qasync-0.27.1.tar.gz", hash = "sha256:8dc768fd1ee5de1044c7c305eccf2d39d24d87803ea71189d4024fb475f4985f"}, -] - [[package]] name = "rapidfuzz" version = "3.13.0" @@ -9034,9 +8991,10 @@ requests = ">=2.0.1,<3.0.0" name = "requirements-parser" version = "0.13.0" description = "This is a small Python module for parsing Pip requirement files." -optional = false +optional = true python-versions = "<4.0,>=3.8" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "requirements_parser-0.13.0-py3-none-any.whl", hash = "sha256:2b3173faecf19ec5501971b7222d38f04cb45bb9d87d0ad629ca71e2e62ded14"}, {file = "requirements_parser-0.13.0.tar.gz", hash = "sha256:0843119ca2cb2331de4eb31b10d70462e39ace698fd660a915c247d2301a4418"}, @@ -9116,9 +9074,10 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "rich-argparse" version = "1.7.1" description = "Rich help formatters for argparse and optparse" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "rich_argparse-1.7.1-py3-none-any.whl", hash = "sha256:a8650b42e4a4ff72127837632fba6b7da40784842f08d7395eb67a9cbd7b4bf9"}, {file = "rich_argparse-1.7.1.tar.gz", hash = "sha256:d7a493cde94043e41ea68fb43a74405fa178de981bf7b800f7a3bd02ac5c27be"}, @@ -9342,9 +9301,10 @@ files = [ name = "s3fs" version = "2025.3.2" description = "Convenient Filesystem interface over S3" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130"}, {file = "s3fs-2025.3.2.tar.gz", hash = "sha256:6798f896ec76dd3bfd8beb89f0bb7c5263cb2760e038bae0978505cd172a307c"}, @@ -9366,9 +9326,10 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] name = "s3transfer" version = "0.13.1" description = "An Amazon S3 Transfer Manager" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\"" files = [ {file = "s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724"}, {file = "s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf"}, @@ -9614,9 +9575,10 @@ test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6 name = "semver" version = "3.0.4" description = "Python helper for Semantic Versioning (https://semver.org)" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746"}, {file = "semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602"}, @@ -9644,9 +9606,10 @@ win32 = ["pywin32 ; sys_platform == \"win32\""] name = "sentry-sdk" version = "2.35.1" description = "Python client for Sentry (https://sentry.io)" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"monitoring\"" files = [ {file = "sentry_sdk-2.35.1-py2.py3-none-any.whl", hash = "sha256:13b6d6cfdae65d61fe1396a061cf9113b20f0ec1bcb257f3826b88f01bb55720"}, {file = "sentry_sdk-2.35.1.tar.gz", hash = "sha256:241b41e059632fe1f7c54ae6e1b93af9456aebdfc297be9cf7ecfd6da5167e8e"}, @@ -9702,9 +9665,10 @@ unleash = ["UnleashClient (>=6.0.1)"] name = "setuptools" version = "80.9.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dlt\"" files = [ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, @@ -9731,22 +9695,6 @@ files = [ {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, ] -[[package]] -name = "shiboken6" -version = "6.9.2" -description = "Python/C++ bindings helper module" -optional = true -python-versions = "<3.14,>=3.9" -groups = ["main"] -markers = "extra == \"gui\"" -files = [ - {file = "shiboken6-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:8bb1c4326330e53adeac98bfd9dcf57f5173a50318a180938dcc4825d9ca38da"}, - {file = "shiboken6-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3b54c0a12ea1b03b9dc5dcfb603c366e957dc75341bf7cb1cc436d0d848308ee"}, - {file = "shiboken6-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a5f5985938f5acb604c23536a0ff2efb3cccb77d23da91fbaff8fd8ded3dceb4"}, - {file = "shiboken6-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:68c33d565cd4732be762d19ff67dfc53763256bac413d392aa8598b524980bc4"}, - {file = "shiboken6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:c5b827797b3d89d9b9a3753371ff533fcd4afc4531ca51a7c696952132098054"}, -] - [[package]] name = "sigtools" version = "4.0.1" @@ -9771,9 +9719,10 @@ tests = ["coverage", "mock", "repeated-test (>=2.2.1)", "sphinx"] name = "simplejson" version = "3.20.1" description = "Simple, fast, extensible JSON encoder/decoder for Python" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.5" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f5272b5866b259fe6c33c4a8c5073bf8b359c3c97b70c298a2f09a69b52c7c41"}, {file = "simplejson-3.20.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5c0de368f3052a59a1acf21f8b2dd28686a9e4eba2da7efae7ed9554cb31e7bc"}, @@ -9891,9 +9840,10 @@ files = [ name = "six" version = "1.17.0" description = "Python 2 and 3 compatibility utilities" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"aws\" or extra == \"notebook\" or extra == \"dev\" or extra == \"deepeval\" or extra == \"posthog\" or extra == \"chromadb\" or extra == \"falkordb\" or extra == \"docs\" or extra == \"evals\" or extra == \"dlt\" or extra == \"visualization\"" files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -9903,9 +9853,10 @@ files = [ name = "smmap" version = "5.0.2" description = "A pure Python implementation of a sliding window memory map manager" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"dlt\" or extra == \"dev\"" files = [ {file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"}, {file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"}, @@ -10036,9 +9987,10 @@ sqlcipher = ["sqlcipher3_binary"] name = "sqlglot" version = "27.8.0" description = "An easily customizable SQL parser and transpiler" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"dlt\"" files = [ {file = "sqlglot-27.8.0-py3-none-any.whl", hash = "sha256:3961277277bc5bae459762294e160b6b7ce998e7d016f5adf8311a1d50b7a1a7"}, {file = "sqlglot-27.8.0.tar.gz", hash = "sha256:026ca21be0106d23f67519d583a24131d27131ceb80b595efa2a59a2746f351f"}, @@ -10366,9 +10318,10 @@ files = [ name = "tomlkit" version = "0.13.3" description = "Style preserving TOML library" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"dlt\" or extra == \"dev\"" files = [ {file = "tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0"}, {file = "tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1"}, @@ -10705,9 +10658,10 @@ typing-extensions = ">=4.12.0" name = "tzdata" version = "2025.2" description = "Provider of IANA time zone data" -optional = false +optional = true python-versions = ">=2" groups = ["main"] +markers = "extra == \"chromadb\" or extra == \"docs\" or extra == \"evals\" or extra == \"dlt\"" files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -11288,10 +11242,10 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"] name = "win-precise-time" version = "1.4.2" description = "" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] -markers = "os_name == \"nt\" and python_version <= \"3.12\"" +markers = "extra == \"dlt\" and os_name == \"nt\" and python_version <= \"3.12\"" files = [ {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, @@ -11702,32 +11656,35 @@ cffi = ["cffi (>=1.17) ; python_version >= \"3.13\" and platform_python_implemen anthropic = ["anthropic"] api = ["gunicorn", "uvicorn", "websockets"] aws = ["s3fs"] +baml = ["baml-py"] chromadb = ["chromadb", "pypika"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] debug = ["debugpy"] deepeval = ["deepeval"] dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"] distributed = ["modal"] +dlt = ["dlt"] docs = ["unstructured"] -evals = ["gdown", "plotly"] +evals = ["gdown", "pandas", "plotly"] falkordb = ["falkordb"] gemini = ["google-generativeai"] graphiti = ["graphiti-core"] groq = ["groq"] -gui = ["pyside6", "qasync"] huggingface = ["transformers"] langchain = ["langchain_text_splitters", "langsmith"] llama-index = ["llama-index-core"] mistral = ["mistral-common"] +monitoring = ["langfuse", "sentry-sdk"] neo4j = ["neo4j"] neptune = ["langchain_aws"] notebook = ["notebook"] ollama = ["transformers"] -postgres = ["asyncpg", "pgvector", "psycopg2"] -postgres-binary = ["asyncpg", "pgvector", "psycopg2-binary"] +postgres = ["asyncpg", "pgvector", "psycopg2-binary"] +postgres-source = ["asyncpg", "pgvector", "psycopg2"] posthog = ["posthog"] +visualization = ["matplotlib", "networkx"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "7743005314483d6cc76febb7970c8af9a3d2a63e76247505e33b20fdc974aca1" +content-hash = "5468b51a59af0c5530d4f8c5a0180d5161a034538f3a9707a46a82b4abde2754" From 00c7dfae492509549b1c26cbf530736e30b776c4 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 19:43:34 +0200 Subject: [PATCH 08/25] added lancedb pandas removal --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b17b4893a..4c165a325 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,6 @@ dependencies = [ "fastapi-users[sqlalchemy]>=14.0.1,<15.0.0", "structlog>=25.2.0,<26", "pympler>=1.1,<2.0.0", - "onnxruntime>=1.0.0,<2.0.0", "pylance>=0.22.0,<1.0.0", "kuzu (==0.11.0)", "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows From 76b8e16bcb1abc424cd699453c2e8832d9825376 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 19:59:38 +0200 Subject: [PATCH 09/25] removed extra things --- .../data/utils/extract_keywords.py | 64 +++++++++---------- cognee/shared/utils.py | 45 +++++++------ pyproject.toml | 5 +- 3 files changed, 58 insertions(+), 56 deletions(-) diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index 8085459c9..811999618 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -1,48 +1,46 @@ -from sklearn.feature_extraction.text import TfidfVectorizer from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError -from cognee.shared.utils import extract_pos_tags -def extract_keywords(text: str) -> list[str]: - """ - Extract keywords from the provided text string. +# def extract_keywords(text: str) -> list[str]: +# """ +# Extract keywords from the provided text string. - This function raises an KeyWordExtractionError if the input text is empty. It processes the - text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most - relevant keywords based on their frequency. The function returns a list of up to 15 - keywords, each having more than 3 characters. +# This function raises an KeyWordExtractionError if the input text is empty. It processes the +# text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most +# relevant keywords based on their frequency. The function returns a list of up to 15 +# keywords, each having more than 3 characters. - Parameters: - ----------- +# Parameters: +# ----------- - - text (str): The input text from which to extract keywords. +# - text (str): The input text from which to extract keywords. - Returns: - -------- +# Returns: +# -------- - - list[str]: A list of keywords extracted from the text, containing up to 15 nouns - with more than 3 characters. - """ - if len(text) == 0: - raise KeywordExtractionError() +# - list[str]: A list of keywords extracted from the text, containing up to 15 nouns +# with more than 3 characters. +# """ +# if len(text) == 0: +# raise KeywordExtractionError() - tags = extract_pos_tags(text) - nouns = [word for (word, tag) in tags if tag == "NN"] +# tags = extract_pos_tags(text) +# nouns = [word for (word, tag) in tags if tag == "NN"] - vectorizer = TfidfVectorizer() - tfidf = vectorizer.fit_transform(nouns) +# vectorizer = TfidfVectorizer() +# tfidf = vectorizer.fit_transform(nouns) - top_nouns = sorted( - vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True - ) +# top_nouns = sorted( +# vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True +# ) - keywords = [] +# keywords = [] - for word in top_nouns: - if len(word) > 3: - keywords.append(word) - if len(keywords) >= 15: - break +# for word in top_nouns: +# if len(word) > 3: +# keywords.append(word) +# if len(keywords) >= 15: +# break - return keywords +# return keywords diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 22557d2ac..df141f2de 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -18,34 +18,41 @@ proxy_url = "https://test.prometh.ai" def get_entities(tagged_tokens): - import nltk - - nltk.download("maxent_ne_chunker", quiet=True) - - from nltk.chunk import ne_chunk - - return ne_chunk(tagged_tokens) + try: + import nltk + nltk.download("maxent_ne_chunker", quiet=True) + from nltk.chunk import ne_chunk + return ne_chunk(tagged_tokens) + except ImportError: + raise ImportError( + "NLTK is required for entity extraction. Install with 'pip install cognee[nlp]' to use this feature." + ) def extract_pos_tags(sentence): """Extract Part-of-Speech (POS) tags for words in a sentence.""" - import nltk + try: + import nltk - # Ensure that the necessary NLTK resources are downloaded - nltk.download("words", quiet=True) - nltk.download("punkt", quiet=True) - nltk.download("averaged_perceptron_tagger", quiet=True) + # Ensure that the necessary NLTK resources are downloaded + nltk.download("words", quiet=True) + nltk.download("punkt", quiet=True) + nltk.download("averaged_perceptron_tagger", quiet=True) - from nltk.tag import pos_tag - from nltk.tokenize import word_tokenize + from nltk.tag import pos_tag + from nltk.tokenize import word_tokenize - # Tokenize the sentence into words - tokens = word_tokenize(sentence) + # Tokenize the sentence into words + tokens = word_tokenize(sentence) - # Tag each word with its corresponding POS tag - pos_tags = pos_tag(tokens) + # Tag each word with its corresponding POS tag + pos_tags = pos_tag(tokens) - return pos_tags + return pos_tags + except ImportError: + raise ImportError( + "NLTK is required for POS tagging. Install with 'pip install cognee[nlp]' to use this feature." + ) def get_anonymous_id(): diff --git a/pyproject.toml b/pyproject.toml index 4c165a325..e71393b21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ dependencies = [ "pydantic>=2.10.5,<3.0.0", "pydantic-settings>=2.2.1,<3", "typing_extensions>=4.12.2,<5.0.0", - "nltk>=3.9.1,<4.0.0", "numpy>=1.26.4, <=4.0.0", "sqlalchemy>=2.0.39,<3.0.0", "aiosqlite>=0.20.0,<1.0.0", @@ -40,14 +39,11 @@ dependencies = [ "jinja2>=3.1.3,<4", "lancedb>=0.24.0,<1.0.0", "alembic>=1.13.3,<2", - "pre-commit>=4.0.1,<5", - "scikit-learn>=1.6.1,<2", "limits>=4.4.1,<5", "fastapi>=0.115.7,<1.0.0", "python-multipart>=0.0.20,<1.0.0", "fastapi-users[sqlalchemy]>=14.0.1,<15.0.0", "structlog>=25.2.0,<26", - "pympler>=1.1,<2.0.0", "pylance>=0.22.0,<1.0.0", "kuzu (==0.11.0)", "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows @@ -135,6 +131,7 @@ dev = [ debug = ["debugpy>=1.8.9,<2.0.0"] visualization = ["networkx>=3.4.2,<4", "matplotlib>=3.8.3,<4"] monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"] +nlp = ["nltk>=3.9.1,<4.0.0"] [project.urls] Homepage = "https://www.cognee.ai" From 861ae205d23d5332de2c440920205c4ab0ab6de7 Mon Sep 17 00:00:00 2001 From: vasilije Date: Wed, 27 Aug 2025 21:12:45 +0200 Subject: [PATCH 10/25] removed extra things --- cognee/modules/ingestion/data_types/TextData.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognee/modules/ingestion/data_types/TextData.py b/cognee/modules/ingestion/data_types/TextData.py index 7c2364a5a..90d5335b5 100644 --- a/cognee/modules/ingestion/data_types/TextData.py +++ b/cognee/modules/ingestion/data_types/TextData.py @@ -1,7 +1,6 @@ from typing import BinaryIO from contextlib import asynccontextmanager import hashlib -from cognee.infrastructure.data.utils.extract_keywords import extract_keywords from .IngestionData import IngestionData From 8f9e289a83c15e2969aed0919ebb1fb83ed6e3b4 Mon Sep 17 00:00:00 2001 From: vasilije Date: Thu, 28 Aug 2025 08:10:25 +0200 Subject: [PATCH 11/25] added baml test fix and format --- .github/workflows/basic_tests.yml | 3 +++ cognee/infrastructure/data/utils/extract_keywords.py | 1 - cognee/shared/utils.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index e2264da3d..b6da0d295 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -189,6 +189,9 @@ jobs: with: python-version: ${{ inputs.python-version }} + - name: Install BAML Dependencies + run: uv add "cognee[baml]" + - name: Run Simple Examples run: uv run python ./examples/python/simple_example.py diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index 811999618..c3e47c4c8 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -1,4 +1,3 @@ - from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index df141f2de..16d0961cc 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -20,8 +20,10 @@ proxy_url = "https://test.prometh.ai" def get_entities(tagged_tokens): try: import nltk + nltk.download("maxent_ne_chunker", quiet=True) from nltk.chunk import ne_chunk + return ne_chunk(tagged_tokens) except ImportError: raise ImportError( From 0fd1dae448a29f7e777229963d6e31d4a3e4a92b Mon Sep 17 00:00:00 2001 From: vasilije Date: Tue, 2 Sep 2025 09:31:28 +0200 Subject: [PATCH 12/25] reverted some changes and remove some upper bounds --- pyproject.toml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e71393b21..6667819c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ "Operating System :: Microsoft :: Windows", ] dependencies = [ - "openai>=1.80.1,<1.99.9", + "openai>=1.80.1", "python-dotenv>=1.0.1,<2.0.0", "pydantic>=2.10.5,<3.0.0", "pydantic-settings>=2.2.1,<3", @@ -29,7 +29,7 @@ dependencies = [ "sqlalchemy>=2.0.39,<3.0.0", "aiosqlite>=0.20.0,<1.0.0", "tiktoken>=0.8.0,<1.0.0", - "litellm>=1.71.0, <2.0.0", + "litellm>=1.76.0", "instructor>=1.9.1,<2.0.0", "filetype>=1.2.0,<2.0.0", "aiohttp>=3.11.14,<4.0.0", @@ -47,6 +47,10 @@ dependencies = [ "pylance>=0.22.0,<1.0.0", "kuzu (==0.11.0)", "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows + "fastembed<=0.6.0 ", + "networkx>=3.4.2,<4", + "matplotlib>=3.8.3,<4" + ] [project.optional-dependencies] From aa445cfaa7828863d35287c99f7bd2178a2bea70 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 15:56:11 -0700 Subject: [PATCH 13/25] addressed issues --- cognee/eval_framework/modal_eval_dashboard.py | 2 +- .../data/utils/extract_keywords.py | 45 ------------------- cognee/infrastructure/llm/config.py | 7 +++ .../baml_src/extraction/extract_summary.py | 10 ----- .../knowledge_graph/extract_content_graph.py | 5 --- cognee/shared/utils.py | 40 ----------------- pyproject.toml | 11 ++--- 7 files changed, 11 insertions(+), 109 deletions(-) delete mode 100644 cognee/infrastructure/data/utils/extract_keywords.py diff --git a/cognee/eval_framework/modal_eval_dashboard.py b/cognee/eval_framework/modal_eval_dashboard.py index 9ff6f543c..e2ed8e453 100644 --- a/cognee/eval_framework/modal_eval_dashboard.py +++ b/cognee/eval_framework/modal_eval_dashboard.py @@ -16,7 +16,7 @@ metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_i image = ( modal.Image.debian_slim(python_version="3.11") - .pip_install("streamlit", "plotly") + .pip_install("streamlit", "pandas", "plotly") .add_local_file(__file__, "/root/serve_dashboard.py") ) diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py deleted file mode 100644 index c3e47c4c8..000000000 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ /dev/null @@ -1,45 +0,0 @@ -from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError - - -# def extract_keywords(text: str) -> list[str]: -# """ -# Extract keywords from the provided text string. - -# This function raises an KeyWordExtractionError if the input text is empty. It processes the -# text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most -# relevant keywords based on their frequency. The function returns a list of up to 15 -# keywords, each having more than 3 characters. - -# Parameters: -# ----------- - -# - text (str): The input text from which to extract keywords. - -# Returns: -# -------- - -# - list[str]: A list of keywords extracted from the text, containing up to 15 nouns -# with more than 3 characters. -# """ -# if len(text) == 0: -# raise KeywordExtractionError() - -# tags = extract_pos_tags(text) -# nouns = [word for (word, tag) in tags if tag == "NN"] - -# vectorizer = TfidfVectorizer() -# tfidf = vectorizer.fit_transform(nouns) - -# top_nouns = sorted( -# vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True -# ) - -# keywords = [] - -# for word in top_nouns: -# if len(word) > 3: -# keywords.append(word) -# if len(keywords) >= 15: -# break - -# return keywords diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index c5240935c..9e5120456 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -75,6 +75,13 @@ class LLMConfig(BaseSettings): def model_post_init(self, __context) -> None: """Initialize the BAML registry after the model is created.""" + # Check if BAML is selected as structured output framework but not available + if self.structured_output_framework == "baml" and ClientRegistry is None: + raise ImportError( + "BAML is selected as structured output framework but not available. " + "Please install with 'pip install cognee[baml]' to use BAML extraction features." + ) + if ClientRegistry is not None: self.baml_registry = ClientRegistry() self.baml_registry.add_llm_client( diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py index 89889d294..697a52a45 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py @@ -37,11 +37,6 @@ async def extract_summary(content: str, response_model: Type[BaseModel]): """ config = get_llm_config() - if config.baml_registry is None: - raise ImportError( - "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." - ) - # Use BAML's SummarizeContent function summary_result = await b.SummarizeContent( content, baml_options={"client_registry": config.baml_registry} @@ -82,11 +77,6 @@ async def extract_code_summary(content: str): try: config = get_llm_config() - if config.baml_registry is None: - raise ImportError( - "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." - ) - result = await b.SummarizeCode( content, baml_options={"client_registry": config.baml_registry} ) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py index f87d87d1b..abff07e09 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py @@ -16,11 +16,6 @@ async def extract_content_graph( get_logger(level="INFO") - if config.baml_registry is None: - raise ImportError( - "BAML is not available. Please install with 'pip install cognee[baml]' to use BAML extraction features." - ) - # if response_model: # # tb = TypeBuilder() # # country = tb.union \ diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 16d0961cc..3071a82cb 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -17,46 +17,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine proxy_url = "https://test.prometh.ai" -def get_entities(tagged_tokens): - try: - import nltk - - nltk.download("maxent_ne_chunker", quiet=True) - from nltk.chunk import ne_chunk - - return ne_chunk(tagged_tokens) - except ImportError: - raise ImportError( - "NLTK is required for entity extraction. Install with 'pip install cognee[nlp]' to use this feature." - ) - - -def extract_pos_tags(sentence): - """Extract Part-of-Speech (POS) tags for words in a sentence.""" - try: - import nltk - - # Ensure that the necessary NLTK resources are downloaded - nltk.download("words", quiet=True) - nltk.download("punkt", quiet=True) - nltk.download("averaged_perceptron_tagger", quiet=True) - - from nltk.tag import pos_tag - from nltk.tokenize import word_tokenize - - # Tokenize the sentence into words - tokens = word_tokenize(sentence) - - # Tag each word with its corresponding POS tag - pos_tags = pos_tag(tokens) - - return pos_tags - except ImportError: - raise ImportError( - "NLTK is required for POS tagging. Install with 'pip install cognee[nlp]' to use this feature." - ) - - def get_anonymous_id(): """Creates or reads a anonymous user id""" tracking_id = os.getenv("TRACKING_ID", None) diff --git a/pyproject.toml b/pyproject.toml index 6667819c8..2a4c36132 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,8 @@ dependencies = [ "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows "fastembed<=0.6.0 ", "networkx>=3.4.2,<4", - "matplotlib>=3.8.3,<4" + "matplotlib>=3.8.3,<4", + "baml-py>=0.201.0" ] @@ -66,18 +67,12 @@ distributed = [ # Database backends neo4j = ["neo4j>=5.28.0,<6"] neptune = ["langchain_aws>=0.2.22"] -# PostgreSQL support (binary - no compilation required) postgres = [ + "psycopg2>=2.9.10,<3", "psycopg2-binary>=2.9.10,<3.0.0", # Pre-compiled binary, no PostgreSQL headers needed "pgvector>=0.3.5,<0.4", "asyncpg>=0.30.0,<1.0.0", ] -# PostgreSQL support (source - requires PostgreSQL development headers) -postgres-source = [ - "psycopg2>=2.9.10,<3 ; platform_system != 'Windows'", # Requires libpq-dev, build tools - "pgvector>=0.3.5,<0.4", - "asyncpg>=0.30.0,<1.0.0", -] notebook = ["notebook>=7.1.0,<8"] langchain = [ "langsmith>=0.2.3,<1.0.0", From 356e685563001bdd90aca9529deb36c0d13f70a6 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 16:03:39 -0700 Subject: [PATCH 14/25] adding info --- cognee/infrastructure/llm/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 9e5120456..4281921f7 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -1,5 +1,5 @@ import os -from typing import Optional, ClassVar +from typing import Optional, ClassVar, Any from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic import model_validator @@ -69,7 +69,7 @@ class LLMConfig(BaseSettings): fallback_endpoint: str = "" fallback_model: str = "" - baml_registry: ClassVar = None + baml_registry: Optional[Any] = None model_config = SettingsConfigDict(env_file=".env", extra="allow") From bb8b47bf344ff65b8e3f7e67010d05f3f3ba4841 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 16:37:39 -0700 Subject: [PATCH 15/25] add fix --- .github/workflows/basic_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index b6da0d295..bfe15a6cf 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -190,7 +190,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Install BAML Dependencies - run: uv add "cognee[baml]" + run: uv add "baml-py>=0.201.0,<0.202.0" - name: Run Simple Examples run: uv run python ./examples/python/simple_example.py From b230247c25ceb9812d9a0f1b694109158ab6e697 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 7 Sep 2025 16:50:39 -0700 Subject: [PATCH 16/25] added s3 as optional --- .../files/storage/S3FileStorage.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cognee/infrastructure/files/storage/S3FileStorage.py b/cognee/infrastructure/files/storage/S3FileStorage.py index 6218d6240..0b95ca0cf 100644 --- a/cognee/infrastructure/files/storage/S3FileStorage.py +++ b/cognee/infrastructure/files/storage/S3FileStorage.py @@ -1,6 +1,5 @@ import os -import s3fs -from typing import BinaryIO, Union +from typing import BinaryIO, Union, TYPE_CHECKING from contextlib import asynccontextmanager from cognee.infrastructure.files.storage.s3_config import get_s3_config @@ -8,17 +7,27 @@ from cognee.infrastructure.utils.run_async import run_async from cognee.infrastructure.files.storage.FileBufferedReader import FileBufferedReader from .storage import Storage +if TYPE_CHECKING: + import s3fs + class S3FileStorage(Storage): """ - Manage local file storage operations such as storing, retrieving, and managing files on - the filesystem. + Manage S3 file storage operations such as storing, retrieving, and managing files on + S3-compatible storage. """ storage_path: str - s3: s3fs.S3FileSystem + s3: "s3fs.S3FileSystem" def __init__(self, storage_path: str): + try: + import s3fs + except ImportError: + raise ImportError( + "s3fs is required for S3FileStorage. Install it with: pip install cognee[aws]" + ) + self.storage_path = storage_path s3_config = get_s3_config() if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: From d2d0d0de4ed65f671f11d999396b4f9f093e622c Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 13:32:09 +0200 Subject: [PATCH 17/25] refactor: install cognee defined baml version for CI/CD --- .github/actions/cognee_setup/action.yml | 4 ++-- .github/workflows/basic_tests.yml | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index 9ae669b53..77c5b3b1f 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -24,7 +24,7 @@ runs: uses: astral-sh/setup-uv@v4 with: enable-cache: true - + - name: Rebuild uv lockfile shell: bash run: | @@ -41,4 +41,4 @@ runs: EXTRA_ARGS="$EXTRA_ARGS --extra $extra" done fi - uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS + uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra baml --extra neo4j $EXTRA_ARGS diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index bfe15a6cf..e2264da3d 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -189,9 +189,6 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install BAML Dependencies - run: uv add "baml-py>=0.201.0,<0.202.0" - - name: Run Simple Examples run: uv run python ./examples/python/simple_example.py From ca2e63bd84ed9c6acb1234f45785a78385ced467 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 13:49:04 +0200 Subject: [PATCH 18/25] refactor: Move postgres handling to database creation time --- .../databases/vector/create_vector_engine.py | 7 +++++- .../vector/pgvector/PGVectorAdapter.py | 24 +++---------------- cognee/infrastructure/llm/config.py | 6 ++--- .../extraction/acreate_structured_output.py | 3 ++- 4 files changed, 14 insertions(+), 26 deletions(-) diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 77bf7d83f..9432cb296 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -66,7 +66,12 @@ def create_vector_engine( f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" ) - from .pgvector.PGVectorAdapter import PGVectorAdapter + try: + from .pgvector.PGVectorAdapter import PGVectorAdapter + except ImportError: + raise ImportError( + "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PGVector functionality." + ) return PGVectorAdapter( connection_string, diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index b2e2bf8c7..d20d7d519 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -7,20 +7,7 @@ from sqlalchemy import JSON, Column, Table, select, delete, MetaData from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker from sqlalchemy.exc import ProgrammingError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential - -try: - from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError -except ImportError: - # PostgreSQL dependencies not installed, define dummy exceptions - class DeadlockDetectedError(Exception): - pass - - class DuplicateTableError(Exception): - pass - - class UniqueViolationError(Exception): - pass - +from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine import DataPoint @@ -82,14 +69,9 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Has to be imported at class level # Functions reading tables from database need to know what a Vector column type is - try: - from pgvector.sqlalchemy import Vector + from pgvector.sqlalchemy import Vector - self.Vector = Vector - except ImportError: - raise ImportError( - "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PGVector functionality." - ) + self.Vector = Vector async def embed_data(self, data: list[str]) -> list[list[float]]: """ diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 6c4047847..3c3ff00b0 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -76,14 +76,14 @@ class LLMConfig(BaseSettings): def model_post_init(self, __context) -> None: """Initialize the BAML registry after the model is created.""" # Check if BAML is selected as structured output framework but not available - if self.structured_output_framework == "baml" and ClientRegistry is None: + if self.structured_output_framework.lower() == "baml" and ClientRegistry is None: raise ImportError( "BAML is selected as structured output framework but not available. " "Please install with 'pip install cognee[baml]' to use BAML extraction features." ) + elif self.structured_output_framework.lower() == "baml" and ClientRegistry is not None: + self.baml_registry = ClientRegistry() - if ClientRegistry is not None: - LLMConfig.baml_registry = ClientRegistry() raw_options = { "model": self.baml_llm_model, "temperature": self.baml_llm_temperature, diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py index 8efcce23d..55632be8e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py @@ -53,7 +53,8 @@ async def acreate_structured_output( # Transform BAML response to proper pydantic reponse model if response_model is str: - return str(result) + # Note: when a response model is set to string in python result is stored in text property in the BAML response model + return str(result.text) return response_model.model_validate(result.dict()) From d1724c710b84ee90bf20ac03218b739662dd39d7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 13:55:01 +0200 Subject: [PATCH 19/25] refactor: Add proper pip install command for optional extras --- cognee/api/client.py | 2 +- cognee/eval_framework/modal_eval_dashboard.py | 2 +- .../databases/vector/create_vector_engine.py | 2 +- .../files/storage/S3FileStorage.py | 2 +- .../files/utils/open_data_file.py | 2 +- .../cognee_network_visualization.py | 23 ++++++++----------- 6 files changed, 14 insertions(+), 19 deletions(-) diff --git a/cognee/api/client.py b/cognee/api/client.py index 978bc1929..f129ff2ec 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -51,7 +51,7 @@ if os.getenv("ENV", "prod") == "prod": ) except ImportError: logger.info( - "Sentry SDK not available. Install with 'pip install cognee[monitoring]' to enable error monitoring." + "Sentry SDK not available. Install with 'pip install cognee\"[monitoring]\"' to enable error monitoring." ) diff --git a/cognee/eval_framework/modal_eval_dashboard.py b/cognee/eval_framework/modal_eval_dashboard.py index 6fbe45f8a..9b1147528 100644 --- a/cognee/eval_framework/modal_eval_dashboard.py +++ b/cognee/eval_framework/modal_eval_dashboard.py @@ -82,7 +82,7 @@ def main(): import pandas as pd except ImportError: st.error( - "Pandas is required for the evaluation dashboard. Install with 'pip install cognee[evals]' to use this feature." + "Pandas is required for the evaluation dashboard. Install with 'pip install cognee\"[evals]\"' to use this feature." ) return diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 9432cb296..5c4e93359 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -70,7 +70,7 @@ def create_vector_engine( from .pgvector.PGVectorAdapter import PGVectorAdapter except ImportError: raise ImportError( - "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PGVector functionality." + "PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PGVector functionality." ) return PGVectorAdapter( diff --git a/cognee/infrastructure/files/storage/S3FileStorage.py b/cognee/infrastructure/files/storage/S3FileStorage.py index 789018d46..4c986bbe9 100644 --- a/cognee/infrastructure/files/storage/S3FileStorage.py +++ b/cognee/infrastructure/files/storage/S3FileStorage.py @@ -25,7 +25,7 @@ class S3FileStorage(Storage): import s3fs except ImportError: raise ImportError( - "s3fs is required for S3FileStorage. Install it with: pip install cognee[aws]" + 's3fs is required for S3FileStorage. Install it with: pip install cognee"[aws]"' ) self.storage_path = storage_path diff --git a/cognee/infrastructure/files/utils/open_data_file.py b/cognee/infrastructure/files/utils/open_data_file.py index fcfca4161..ad8a6b63e 100644 --- a/cognee/infrastructure/files/utils/open_data_file.py +++ b/cognee/infrastructure/files/utils/open_data_file.py @@ -26,7 +26,7 @@ async def open_data_file(file_path: str, mode: str = "rb", encoding: str = None, from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage except ImportError: raise ImportError( - "S3 dependencies are not installed. Please install with 'pip install cognee[aws]' to use S3 functionality." + "S3 dependencies are not installed. Please install with 'pip install cognee\"[aws]\"' to use S3 functionality." ) normalized_url = get_data_file_path(file_path) diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index ef9b2f126..bbdbc0019 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -8,12 +8,7 @@ logger = get_logger() async def cognee_network_visualization(graph_data, destination_file_path: str = None): - try: - import networkx - except ImportError: - raise ImportError( - "NetworkX is not installed. Please install with 'pip install cognee[visualization]' to use graph visualization features." - ) + import networkx nodes_data, edges_data = graph_data @@ -110,7 +105,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = .nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); } .node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; } .edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; } - + .tooltip { position: absolute; text-align: left; @@ -172,7 +167,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = // Create tooltip content for edge var content = "Edge Information
"; content += "Relationship: " + d.relation + "
"; - + // Show all weights if (d.all_weights && Object.keys(d.all_weights).length > 0) { content += "Weights:
"; @@ -182,23 +177,23 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = } else if (d.weight !== null && d.weight !== undefined) { content += "Weight: " + d.weight + "
"; } - + if (d.relationship_type) { content += "Type: " + d.relationship_type + "
"; } - + // Add other edge properties if (d.edge_info) { Object.keys(d.edge_info).forEach(function(key) { - if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' && - key !== 'source_node_id' && key !== 'target_node_id' && - key !== 'relationship_name' && key !== 'updated_at' && + if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' && + key !== 'source_node_id' && key !== 'target_node_id' && + key !== 'relationship_name' && key !== 'updated_at' && !key.startsWith('weight_')) { content += key + ": " + d.edge_info[key] + "
"; } }); } - + tooltip.html(content) .style("left", (d3.event.pageX + 10) + "px") .style("top", (d3.event.pageY - 10) + "px") From 8265ec03341cad520232c87ba1f99f9fd2cabe28 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 13:57:14 +0200 Subject: [PATCH 20/25] refactor: Add missing install info --- .../databases/relational/create_relational_engine.py | 2 +- cognee/infrastructure/llm/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/databases/relational/create_relational_engine.py b/cognee/infrastructure/databases/relational/create_relational_engine.py index 4f117bf4c..deaeaa2da 100644 --- a/cognee/infrastructure/databases/relational/create_relational_engine.py +++ b/cognee/infrastructure/databases/relational/create_relational_engine.py @@ -48,7 +48,7 @@ def create_relational_engine( ) except ImportError: raise ImportError( - "PostgreSQL dependencies are not installed. Please install with 'pip install cognee[postgres]' or 'pip install cognee[postgres-binary]' to use PostgreSQL functionality." + "PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality." ) return SQLAlchemyAdapter(connection_string) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 3c3ff00b0..6658a6251 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -79,7 +79,7 @@ class LLMConfig(BaseSettings): if self.structured_output_framework.lower() == "baml" and ClientRegistry is None: raise ImportError( "BAML is selected as structured output framework but not available. " - "Please install with 'pip install cognee[baml]' to use BAML extraction features." + "Please install with 'pip install cognee\"[baml]\"' to use BAML extraction features." ) elif self.structured_output_framework.lower() == "baml" and ClientRegistry is not None: self.baml_registry = ClientRegistry() From 6f8f9bf7de3b3b16d57d3aff1c5a67bc0aa2e78b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 13:58:52 +0200 Subject: [PATCH 21/25] refactor: make comment more understandable --- .../baml/baml_src/extraction/acreate_structured_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py index 55632be8e..6ef27e51d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py @@ -53,7 +53,7 @@ async def acreate_structured_output( # Transform BAML response to proper pydantic reponse model if response_model is str: - # Note: when a response model is set to string in python result is stored in text property in the BAML response model + # Note: when a response model is set to string in python, result is stored in text property in the BAML response model return str(result.text) return response_model.model_validate(result.dict()) From 61ef6fa444328ef9f749013333a383908c0c41a7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 15:26:10 +0200 Subject: [PATCH 22/25] chore: Update pyproject --- poetry.lock | 52 ++++++++++++++++++++++++++++++++++++-------------- pyproject.toml | 6 ++++-- uv.lock | 24 +++++++++++++++++++---- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1e42dc88d..f0f0c4ee4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1200,10 +1200,10 @@ test = ["pytest"] name = "contourpy" version = "1.3.2" description = "Python library for calculating contours of 2D quadrilateral grids" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version == \"3.10\" and extra == \"evals\"" files = [ {file = "contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934"}, {file = "contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989"}, @@ -1278,10 +1278,10 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist" name = "contourpy" version = "1.3.3" description = "Python library for calculating contours of 2D quadrilateral grids" -optional = false +optional = true python-versions = ">=3.11" groups = ["main"] -markers = "python_version >= \"3.11\"" +markers = "python_version >= \"3.11\" and extra == \"evals\"" files = [ {file = "contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1"}, {file = "contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381"}, @@ -1582,9 +1582,10 @@ files = [ name = "cycler" version = "0.12.1" description = "Composable style cycles" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"evals\"" files = [ {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, @@ -2303,9 +2304,10 @@ files = [ name = "fonttools" version = "4.60.0" description = "Tools to manipulate font files" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"evals\"" files = [ {file = "fonttools-4.60.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:151282a235c36024168c21c02193e939e8b28c73d5fa0b36ae1072671d8fa134"}, {file = "fonttools-4.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3f32cc42d485d9b1546463b9a7a92bdbde8aef90bac3602503e04c2ddb27e164"}, @@ -3069,6 +3071,28 @@ multidict = "*" [package.extras] protobuf = ["protobuf (>=3.20.0)"] +[[package]] +name = "gunicorn" +version = "23.0.0" +description = "WSGI HTTP Server for UNIX" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"}, + {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +eventlet = ["eventlet (>=0.24.1,!=0.36.0)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +testing = ["coverage", "eventlet", "gevent", "pytest", "pytest-cov"] +tornado = ["tornado (>=0.2)"] + [[package]] name = "h11" version = "0.16.0" @@ -4209,9 +4233,10 @@ test = ["hatch", "ipykernel", "openapi-core (>=0.18.0,<0.19.0)", "openapi-spec-v name = "kiwisolver" version = "1.4.9" description = "A fast implementation of the Cassowary constraint solver" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"evals\"" files = [ {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b"}, {file = "kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f"}, @@ -5127,9 +5152,10 @@ tests = ["pytest", "simplejson"] name = "matplotlib" version = "3.10.6" description = "Python plotting package" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"evals\"" files = [ {file = "matplotlib-3.10.6-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bc7316c306d97463a9866b89d5cc217824e799fa0de346c8f68f4f3d27c8693d"}, {file = "matplotlib-3.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d00932b0d160ef03f59f9c0e16d1e3ac89646f7785165ce6ad40c842db16cc2e"}, @@ -10788,10 +10814,9 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.37.0" description = "The lightning-fast ASGI server." -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"chromadb\"" files = [ {file = "uvicorn-0.37.0-py3-none-any.whl", hash = "sha256:913b2b88672343739927ce381ff9e2ad62541f9f8289664fa1d1d3803fa2ce6c"}, {file = "uvicorn-0.37.0.tar.gz", hash = "sha256:4115c8add6d3fd536c8ee77f0e14a7fd2ebba939fed9b02583a97f80648f9e13"}, @@ -11112,10 +11137,9 @@ test = ["websockets"] name = "websockets" version = "15.0.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, @@ -11631,7 +11655,7 @@ dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plug distributed = ["modal"] dlt = ["dlt"] docs = ["unstructured"] -evals = ["gdown", "pandas", "plotly"] +evals = ["gdown", "matplotlib", "pandas", "plotly"] falkordb = ["falkordb"] gemini = ["google-generativeai"] graphiti = ["graphiti-core"] @@ -11652,4 +11676,4 @@ posthog = ["posthog"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "320addf9d82b65c74ee256b7a50be3832473661cedfe9d3f1459b125ee8f799c" +content-hash = "e31994b59356d0bbb7c40dcd5e8ea19cc453c3f1303a7e8f82eeadd4595dbfb4" diff --git a/pyproject.toml b/pyproject.toml index 18b481db5..9bcbc8b5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,12 +48,13 @@ dependencies = [ "structlog>=25.2.0,<26", "pympler>=1.1,<2.0.0", "onnxruntime>=1.0.0,<2.0.0", - "pylance>=0.22.0,<1.0.0", "kuzu (==0.11.0)", "python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows "fastembed<=0.6.0 ", "networkx>=3.4.2,<4", - "matplotlib>=3.8.3,<4", + "uvicorn>=0.34.0,<1.0.0", + "gunicorn>=20.1.0,<24", + "websockets>=15.0.1,<16.0.0" ] @@ -106,6 +107,7 @@ evals = [ "plotly>=6.0.0,<7", "gdown>=5.2.0,<6", "pandas>=2.2.2,<3.0.0", + "matplotlib>=3.8.3,<4", ] graphiti = ["graphiti-core>=0.7.0,<0.8"] diff --git a/uv.lock b/uv.lock index 559dea22d..32d93f815 100644 --- a/uv.lock +++ b/uv.lock @@ -826,13 +826,13 @@ dependencies = [ { name = "fastapi-users", extra = ["sqlalchemy"] }, { name = "fastembed" }, { name = "filetype" }, + { name = "gunicorn" }, { name = "instructor" }, { name = "jinja2" }, { name = "kuzu" }, { name = "lancedb" }, { name = "limits" }, { name = "litellm" }, - { name = "matplotlib" }, { name = "nbformat" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -843,7 +843,6 @@ dependencies = [ { name = "pre-commit" }, { name = "pydantic" }, { name = "pydantic-settings" }, - { name = "pylance" }, { name = "pympler" }, { name = "pypdf" }, { name = "python-dotenv" }, @@ -854,6 +853,8 @@ dependencies = [ { name = "structlog" }, { name = "tiktoken" }, { name = "typing-extensions" }, + { name = "uvicorn" }, + { name = "websockets" }, ] [package.optional-dependencies] @@ -909,6 +910,7 @@ docs = [ ] evals = [ { name = "gdown" }, + { name = "matplotlib" }, { name = "pandas" }, { name = "plotly" }, ] @@ -994,6 +996,7 @@ requires-dist = [ { name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" }, { name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" }, { name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" }, + { name = "gunicorn", specifier = ">=20.1.0,<24" }, { name = "instructor", specifier = ">=1.9.1,<2.0.0" }, { name = "jinja2", specifier = ">=3.1.3,<4" }, { name = "kuzu", specifier = "==0.11.0" }, @@ -1005,7 +1008,7 @@ requires-dist = [ { name = "limits", specifier = ">=4.4.1,<5" }, { name = "litellm", specifier = ">=1.76.0" }, { name = "llama-index-core", marker = "extra == 'llama-index'", specifier = ">=0.12.11,<0.13" }, - { name = "matplotlib", specifier = ">=3.8.3,<4" }, + { name = "matplotlib", marker = "extra == 'evals'", specifier = ">=3.8.3,<4" }, { name = "mistral-common", marker = "extra == 'mistral'", specifier = ">=1.5.2,<2" }, { name = "mkdocs-material", marker = "extra == 'dev'", specifier = ">=9.5.42,<10" }, { name = "mkdocs-minify-plugin", marker = "extra == 'dev'", specifier = ">=0.8.0,<0.9" }, @@ -1030,7 +1033,6 @@ requires-dist = [ { name = "psycopg2-binary", marker = "extra == 'postgres-binary'", specifier = ">=2.9.10,<3.0.0" }, { name = "pydantic", specifier = ">=2.10.5,<3.0.0" }, { name = "pydantic-settings", specifier = ">=2.2.1,<3" }, - { name = "pylance", specifier = ">=0.22.0,<1.0.0" }, { name = "pylint", marker = "extra == 'dev'", specifier = ">=3.0.3,<4" }, { name = "pympler", specifier = ">=1.1,<2.0.0" }, { name = "pypdf", specifier = ">=4.1.0,<7.0.0" }, @@ -1056,6 +1058,8 @@ requires-dist = [ { name = "tweepy", marker = "extra == 'dev'", specifier = ">=4.14.0,<5.0.0" }, { name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" }, { name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" }, + { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" }, + { name = "websockets", specifier = ">=15.0.1,<16.0.0" }, ] provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "monitoring"] @@ -2364,6 +2368,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/03/8b/ad381ec1b8195fa4a9a693cb8087e031b99530c0d6b8ad036dcb99e144c4/grpclib-0.4.8-py3-none-any.whl", hash = "sha256:a5047733a7acc1c1cee6abf3c841c7c6fab67d2844a45a853b113fa2e6cd2654", size = 76311, upload-time = "2025-05-04T16:27:22.818Z" }, ] +[[package]] +name = "gunicorn" +version = "23.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" }, +] + [[package]] name = "h11" version = "0.16.0" From 664459e23978538d411fd45096f3245c373facf6 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 15:30:27 +0200 Subject: [PATCH 23/25] refactor: Install baml only for BAML test --- .github/actions/cognee_setup/action.yml | 2 +- .github/workflows/basic_tests.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index 77c5b3b1f..e46a42edb 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -41,4 +41,4 @@ runs: EXTRA_ARGS="$EXTRA_ARGS --extra $extra" done fi - uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra baml --extra neo4j $EXTRA_ARGS + uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index e2264da3d..3f3e644a2 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -188,6 +188,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} + extra-dependencies: "baml" - name: Run Simple Examples run: uv run python ./examples/python/simple_example.py From 50032dd13322ebfb0d3f312fbb9f784e80295350 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 16:02:30 +0200 Subject: [PATCH 24/25] fix: install aws for gh action --- .github/workflows/e2e_tests.yml | 3 --- .github/workflows/test_s3_file_storage.yml | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index af3a3ff44..3fe7a7992 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -166,9 +166,6 @@ jobs: python-version: '3.11.x' extra-dependencies: "aws" - - name: Dependencies already installed - run: echo "Dependencies already installed in setup" - - name: Run S3 Bucket Test env: ENV: 'dev' diff --git a/.github/workflows/test_s3_file_storage.yml b/.github/workflows/test_s3_file_storage.yml index c4866ec2d..a477d8933 100644 --- a/.github/workflows/test_s3_file_storage.yml +++ b/.github/workflows/test_s3_file_storage.yml @@ -18,6 +18,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "aws" - name: Run S3 File Storage Test env: From 4054307b15a13decb728b76544f15648b2b66b2e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 25 Sep 2025 16:03:11 +0200 Subject: [PATCH 25/25] refactor: Remove comment --- .../infrastructure/databases/vector/lancedb/LanceDBAdapter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index 7bc3385b9..0c93f81e7 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -247,8 +247,6 @@ class LanceDBAdapter(VectorDBInterface): result_values = await collection.vector_search(query_vector).limit(limit).to_list() - # result_values = list(results.to_dict("index").values()) - if not result_values: return []