From ed555a731d2c904a5cf0c24240be565c973ffd88 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:27:45 +0200 Subject: [PATCH 01/51] feat: adds new Exception classes --- cognee/exceptions/__init__.py | 7 +++--- cognee/exceptions/exceptions.py | 43 +++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/cognee/exceptions/__init__.py b/cognee/exceptions/__init__.py index d1d4ecbf5..496ad63c5 100644 --- a/cognee/exceptions/__init__.py +++ b/cognee/exceptions/__init__.py @@ -7,8 +7,7 @@ such as service failures, resource conflicts, and invalid operations. from .exceptions import ( CogneeApiError, - ServiceError, - InvalidValueError, - InvalidAttributeError, - CriticalError, + CogneeSystemError, + CogneeValidationError, + CogneeConfigurationError, ) diff --git a/cognee/exceptions/exceptions.py b/cognee/exceptions/exceptions.py index 46e0af1a8..e7eb784b8 100644 --- a/cognee/exceptions/exceptions.py +++ b/cognee/exceptions/exceptions.py @@ -35,37 +35,50 @@ class CogneeApiError(Exception): return f"{self.name}: {self.message} (Status code: {self.status_code})" -class ServiceError(CogneeApiError): - """Failures in external services or APIs, like a database or a third-party service""" +class CogneeSystemError(CogneeApiError): + """System error""" def __init__( self, - message: str = "Service is unavailable.", - name: str = "ServiceError", + message: str = "A system error occurred.", + name: str = "CogneeSystemError", + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ): + super().__init__(message, name, status_code) + + +class CogneeValidationError(CogneeApiError): + """Validation error""" + + def __init__( + self, + message: str = "A validation error occurred.", + name: str = "CogneeValidationError", status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, ): super().__init__(message, name, status_code) -class InvalidValueError(CogneeApiError): +class CogneeConfigurationError(CogneeApiError): + """SystemConfigError""" + def __init__( self, - message: str = "Invalid Value.", - name: str = "InvalidValueError", - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + message: str = "A system configuration error occurred.", + name: str = "CogneeConfigurationError", + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, ): super().__init__(message, name, status_code) -class InvalidAttributeError(CogneeApiError): +class CogneeTransientError(CogneeApiError): + """TransientError""" + def __init__( self, - message: str = "Invalid attribute.", - name: str = "InvalidAttributeError", - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + message: str = "A transient error occurred.", + name: str = "CogneeTransientError", + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, ): super().__init__(message, name, status_code) - -class CriticalError(CogneeApiError): - pass From 0555dd9ddaf48ceec746e65a09adbc748828810c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:29:16 +0200 Subject: [PATCH 02/51] Update __init__.py --- cognee/exceptions/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/exceptions/__init__.py b/cognee/exceptions/__init__.py index 496ad63c5..2f1589291 100644 --- a/cognee/exceptions/__init__.py +++ b/cognee/exceptions/__init__.py @@ -2,7 +2,7 @@ Custom exceptions for the Cognee API. This module defines a set of exceptions for handling various application errors, -such as service failures, resource conflicts, and invalid operations. +such as System, Validation, Configuration or TransientErrors """ from .exceptions import ( @@ -10,4 +10,5 @@ from .exceptions import ( CogneeSystemError, CogneeValidationError, CogneeConfigurationError, + CogneeTransientError, ) From 1b3898dd8b2eb174e8f8e46fe4d4533dce6133f4 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:38:38 +0200 Subject: [PATCH 03/51] feat: updates rel_db exceptions with the new error classes --- .../databases/exceptions/exceptions.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 7a4220358..4aba09d37 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -1,13 +1,13 @@ from fastapi import status -from cognee.exceptions import CogneeApiError, CriticalError +from cognee.exceptions import CogneeSystemError, CogneeValidationError, CogneeConfigurationError -class DatabaseNotCreatedError(CriticalError): +class DatabaseNotCreatedError(CogneeSystemError): """ Represents an error indicating that the database has not been created. This error should be raised when an attempt is made to access the database before it has been initialized. - Inherits from CriticalError. Overrides the constructor to include a default message and + Inherits from CogneeSystemError. Overrides the constructor to include a default message and status code. """ @@ -20,10 +20,10 @@ class DatabaseNotCreatedError(CriticalError): super().__init__(message, name, status_code) -class EntityNotFoundError(CogneeApiError): +class EntityNotFoundError(CogneeValidationError): """ Represents an error when a requested entity is not found in the database. This class - inherits from CogneeApiError. + inherits from CogneeValidationError. Public methods: @@ -49,11 +49,11 @@ class EntityNotFoundError(CogneeApiError): # super().__init__(message, name, status_code) :TODO: This is not an error anymore with the dynamic exception handling therefore we shouldn't log error -class EntityAlreadyExistsError(CogneeApiError): +class EntityAlreadyExistsError(CogneeValidationError): """ Represents an error when an entity creation is attempted but the entity already exists. - This class is derived from CogneeApiError and is used to signal a conflict in operations + This class is derived from CogneeValidationError and is used to signal a conflict in operations involving resource creation. """ @@ -66,11 +66,11 @@ class EntityAlreadyExistsError(CogneeApiError): super().__init__(message, name, status_code) -class NodesetFilterNotSupportedError(CogneeApiError): +class NodesetFilterNotSupportedError(CogneeConfigurationError): """ Raise an exception when a nodeset filter is not supported by the current database. - This exception inherits from `CogneeApiError` and is designed to provide information + This exception inherits from `CogneeConfigurationError` and is designed to provide information about the specific issue of unsupported nodeset filters in the context of graph databases. """ From b40dbf3c68a2def746cd83a78e6d61408ef7c502 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:50:24 +0200 Subject: [PATCH 04/51] feat: adds new error classes to litellm instructor --- cognee/infrastructure/llm/exceptions.py | 21 +++++++++++++++++-- .../litellm_instructor/llm/get_llm_client.py | 5 ++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/llm/exceptions.py b/cognee/infrastructure/llm/exceptions.py index af3aa5832..287820448 100644 --- a/cognee/infrastructure/llm/exceptions.py +++ b/cognee/infrastructure/llm/exceptions.py @@ -1,5 +1,22 @@ -from cognee.exceptions.exceptions import CriticalError +from cognee.exceptions.exceptions import CogneeValidationError -class ContentPolicyFilterError(CriticalError): +class ContentPolicyFilterError(CogneeValidationError): pass + + +class LLMAPIKeyNotSetError(CogneeValidationError): + """ + Raised when the LLM API key is not set in the configuration. + """ + def __init__(self, message: str = "LLM API key is not set."): + super().__init__(message=message, name="LLMAPIKeyNotSetError") + + +class UnsupportedLLMProviderError(CogneeValidationError): + """ + Raised when an unsupported LLM provider is specified in the configuration. + """ + def __init__(self, provider: str): + message = f"Unsupported LLM provider: {provider}" + super().__init__(message=message, name="UnsupportedLLMProviderError") diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 22d101077..b486fa089 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -2,11 +2,14 @@ from enum import Enum -from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import ( OllamaAPIAdapter, ) +from cognee.infrastructure.llm.exceptions import ( + LLMAPIKeyNotSetError, + UnsupportedLLMProviderError, +) # Define an Enum for LLM Providers From 623148ec6af1a4848862e20808dd03be2e9b45fa Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:53:29 +0200 Subject: [PATCH 05/51] feat: adds new errors to litellm client --- .../litellm_instructor/llm/get_llm_client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index b486fa089..3006a795b 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -83,7 +83,7 @@ def get_llm_client(): elif provider == LLMProvider.OLLAMA: if llm_config.llm_api_key is None: - raise InvalidValueError(message="LLM API key is not set.") + raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( GenericAPIAdapter, @@ -106,7 +106,7 @@ def get_llm_client(): elif provider == LLMProvider.CUSTOM: if llm_config.llm_api_key is None: - raise InvalidValueError(message="LLM API key is not set.") + raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( GenericAPIAdapter, @@ -125,7 +125,7 @@ def get_llm_client(): elif provider == LLMProvider.GEMINI: if llm_config.llm_api_key is None: - raise InvalidValueError(message="LLM API key is not set.") + raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import ( GeminiAdapter, @@ -141,4 +141,4 @@ def get_llm_client(): ) else: - raise InvalidValueError(message=f"Unsupported LLM provider: {provider}") + raise UnsupportedLLMProviderError(provider) From 7bd2660d080ea36477e02c4eeb585eb29d601717 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:58:32 +0200 Subject: [PATCH 06/51] feat: setting base classes of data exceptions to the new ones --- cognee/modules/data/exceptions/exceptions.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index 214bf5381..4eff8f9b8 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -1,8 +1,11 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import ( + CogneeValidationError, + CogneeConfigurationError, +) from fastapi import status -class UnstructuredLibraryImportError(CogneeApiError): +class UnstructuredLibraryImportError(CogneeConfigurationError): def __init__( self, message: str = "Import error. Unstructured library is not installed.", @@ -12,7 +15,7 @@ class UnstructuredLibraryImportError(CogneeApiError): super().__init__(message, name, status_code) -class UnauthorizedDataAccessError(CogneeApiError): +class UnauthorizedDataAccessError(CogneeValidationError): def __init__( self, message: str = "User does not have permission to access this data.", @@ -22,7 +25,7 @@ class UnauthorizedDataAccessError(CogneeApiError): super().__init__(message, name, status_code) -class DatasetNotFoundError(CogneeApiError): +class DatasetNotFoundError(CogneeValidationError): def __init__( self, message: str = "Dataset not found.", @@ -32,7 +35,7 @@ class DatasetNotFoundError(CogneeApiError): super().__init__(message, name, status_code) -class DatasetTypeError(CogneeApiError): +class DatasetTypeError(CogneeValidationError): def __init__( self, message: str = "Dataset type not supported.", From 6870bba5a90285e87f066095a3a878865c343fad Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:03:18 +0200 Subject: [PATCH 07/51] feat: adds new error to delete --- cognee/modules/data/exceptions/exceptions.py | 10 ++++++++++ cognee/modules/data/methods/delete_data.py | 6 ++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index 4eff8f9b8..c85e11cb2 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -43,3 +43,13 @@ class DatasetTypeError(CogneeValidationError): status_code=status.HTTP_400_BAD_REQUEST, ): super().__init__(message, name, status_code) + + +class InvalidAttributeError(CogneeValidationError): + def __init__( + self, + message: str = "The provided data object is missing the required '__tablename__' attribute.", + name: str = "InvalidAttributeError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + super().__init__(message, name, status_code) diff --git a/cognee/modules/data/methods/delete_data.py b/cognee/modules/data/methods/delete_data.py index 2d87d73a5..6ec055fcc 100644 --- a/cognee/modules/data/methods/delete_data.py +++ b/cognee/modules/data/methods/delete_data.py @@ -1,4 +1,4 @@ -from cognee.exceptions import InvalidAttributeError +from cognee.modules.data.exceptions.exceptions import InvalidAttributeError from cognee.modules.data.models import Data from cognee.infrastructure.databases.relational import get_relational_engine @@ -13,9 +13,7 @@ async def delete_data(data: Data): ValueError: If the data object is invalid. """ if not hasattr(data, "__tablename__"): - raise InvalidAttributeError( - message="The provided data object is missing the required '__tablename__' attribute." - ) + raise InvalidAttributeError() db_engine = get_relational_engine() From ebd4403c2f74bf6f65f717ad389b89e869a7efc2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:14:42 +0200 Subject: [PATCH 08/51] adds new error classes to keywordextration error and defines new error --- .../data/exceptions/__init__.py | 9 ++++++++ .../data/exceptions/exceptions.py | 22 +++++++++++++++++++ .../data/utils/extract_keywords.py | 4 ++-- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 cognee/infrastructure/data/exceptions/__init__.py create mode 100644 cognee/infrastructure/data/exceptions/exceptions.py diff --git a/cognee/infrastructure/data/exceptions/__init__.py b/cognee/infrastructure/data/exceptions/__init__.py new file mode 100644 index 000000000..6735200ed --- /dev/null +++ b/cognee/infrastructure/data/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + KeywordExtractionError +) diff --git a/cognee/infrastructure/data/exceptions/exceptions.py b/cognee/infrastructure/data/exceptions/exceptions.py new file mode 100644 index 000000000..5c36d6754 --- /dev/null +++ b/cognee/infrastructure/data/exceptions/exceptions.py @@ -0,0 +1,22 @@ +from cognee.exceptions import ( + CogneeValidationError, +) +from fastapi import status + + +class KeywordExtractionError(CogneeValidationError): + """ + Raised when a provided value is syntactically valid but semantically unacceptable + for the given operation. + + Example: + - Passing an empty string to a keyword extraction function. + """ + + def __init__( + self, + message: str = "Extract_keywords cannot extract keywords from empty text.", + name: str = "KeywordExtractionError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index bd4fedd56..2915131a4 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -1,6 +1,6 @@ from sklearn.feature_extraction.text import TfidfVectorizer -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError from cognee.shared.utils import extract_pos_tags @@ -25,7 +25,7 @@ def extract_keywords(text: str) -> list[str]: with more than 3 characters. """ if len(text) == 0: - raise InvalidValueError(message="extract_keywords cannot extract keywords from empty text.") + raise KeywordExtractionError() tags = extract_pos_tags(text) nouns = [word for (word, tag) in tags if tag == "NN"] From 657c775cbe673f10059028f5c3f5c51af239c5bf Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:22:03 +0200 Subject: [PATCH 09/51] feat: adds configattributeerror --- cognee/api/v1/config/config.py | 6 ++---- cognee/api/v1/exceptions/__init__.py | 9 +++++++++ cognee/api/v1/exceptions/exceptions.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 cognee/api/v1/exceptions/__init__.py create mode 100644 cognee/api/v1/exceptions/exceptions.py diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index 9970b7471..03df700cb 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -2,7 +2,6 @@ import os from cognee.base_config import get_base_config -from cognee.exceptions import InvalidValueError, InvalidAttributeError from cognee.modules.cognify.config import get_cognify_config from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.infrastructure.databases.vector import get_vectordb_config @@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import ( get_llm_config, ) from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config +from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError class config: @@ -92,9 +92,7 @@ class config: if hasattr(llm_config, key): object.__setattr__(llm_config, key, value) else: - raise InvalidAttributeError( - message=f"'{key}' is not a valid attribute of the config." - ) + raise InvalidConfigAttributeError(attribute=key) @staticmethod def set_chunk_strategy(chunk_strategy: object): diff --git a/cognee/api/v1/exceptions/__init__.py b/cognee/api/v1/exceptions/__init__.py new file mode 100644 index 000000000..f18b9f99a --- /dev/null +++ b/cognee/api/v1/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + InvalidConfigAttributeError +) diff --git a/cognee/api/v1/exceptions/exceptions.py b/cognee/api/v1/exceptions/exceptions.py new file mode 100644 index 000000000..9875d179b --- /dev/null +++ b/cognee/api/v1/exceptions/exceptions.py @@ -0,0 +1,15 @@ +from cognee.exceptions import ( + CogneeConfigurationError, +) +from fastapi import status + + +class InvalidConfigAttributeError(CogneeConfigurationError): + def __init__( + self, + attribute: str, + name: str = "InvalidConfigAttributeError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = f"'{attribute}' is not a valid attribute of the configuration." + super().__init__(message, name, status_code) From 5bc00f1143d18ef000beb3b7f92411b8160913f7 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:29:35 +0200 Subject: [PATCH 10/51] feat: adds new search classes to search.py --- cognee/modules/search/exceptions/__init__.py | 9 +++++++++ cognee/modules/search/exceptions/exceptions.py | 15 +++++++++++++++ cognee/modules/search/methods/search.py | 5 ++--- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 cognee/modules/search/exceptions/__init__.py create mode 100644 cognee/modules/search/exceptions/exceptions.py diff --git a/cognee/modules/search/exceptions/__init__.py b/cognee/modules/search/exceptions/__init__.py new file mode 100644 index 000000000..ffb30f428 --- /dev/null +++ b/cognee/modules/search/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + UnsupportedSearchTypeError +) diff --git a/cognee/modules/search/exceptions/exceptions.py b/cognee/modules/search/exceptions/exceptions.py new file mode 100644 index 000000000..ddc877700 --- /dev/null +++ b/cognee/modules/search/exceptions/exceptions.py @@ -0,0 +1,15 @@ +from cognee.exceptions import ( + CogneeValidationError, +) +from fastapi import status + + +class UnsupportedSearchTypeError(CogneeValidationError): + def __init__( + self, + search_type: str, + name: str = "UnsupportedSearchTypeError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = f"Unsupported search type: {search_type}" + super().__init__(message, name, status_code) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 365920019..f431a498e 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -3,9 +3,8 @@ import json import asyncio from uuid import UUID from typing import Callable, List, Optional, Type, Union - +from cognee.modules.search.exceptions import UnsupportedSearchTypeError from cognee.context_global_variables import set_database_global_context_variables -from cognee.exceptions import InvalidValueError from cognee.modules.retrieval.chunks_retriever import ChunksRetriever from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever @@ -136,7 +135,7 @@ async def specific_search( search_task = search_tasks.get(query_type) if search_task is None: - raise InvalidValueError(message=f"Unsupported search type: {query_type}") + raise UnsupportedSearchTypeError(str(query_type)) send_telemetry("cognee.search EXECUTION STARTED", user.id) From 9fb9f68c42a9b1179f5bfca59e9e9976fcec323f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:36:31 +0200 Subject: [PATCH 11/51] adds new base errors to retrieval exceptions --- .../modules/retrieval/exceptions/exceptions.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cognee/modules/retrieval/exceptions/exceptions.py b/cognee/modules/retrieval/exceptions/exceptions.py index a403f3e44..3e934909b 100644 --- a/cognee/modules/retrieval/exceptions/exceptions.py +++ b/cognee/modules/retrieval/exceptions/exceptions.py @@ -1,8 +1,8 @@ from fastapi import status -from cognee.exceptions import CogneeApiError, CriticalError +from cognee.exceptions import CogneeValidationError, CogneeSystemError -class SearchTypeNotSupported(CogneeApiError): +class SearchTypeNotSupported(CogneeValidationError): def __init__( self, message: str = "CYPHER search type not supported by the adapter.", @@ -12,7 +12,7 @@ class SearchTypeNotSupported(CogneeApiError): super().__init__(message, name, status_code) -class CypherSearchError(CogneeApiError): +class CypherSearchError(CogneeSystemError): def __init__( self, message: str = "An error occurred during the execution of the Cypher query.", @@ -22,11 +22,17 @@ class CypherSearchError(CogneeApiError): super().__init__(message, name, status_code) -class NoDataError(CriticalError): - message: str = "No data found in the system, please add data first." +class NoDataError(CogneeValidationError): + def __init__( + self, + message: str = "No data found in the system, please add data first.", + name: str = "NoDataError", + status_code: int = status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) -class CollectionDistancesNotFoundError(CogneeApiError): +class CollectionDistancesNotFoundError(CogneeValidationError): def __init__( self, message: str = "No collection distances found for the given query.", From 91b9c11cd01109794c0bedef0f5fe6580435dd96 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 12:39:34 +0200 Subject: [PATCH 12/51] changes base class for vectordb exceptions --- .../databases/vector/exceptions/exceptions.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cognee/infrastructure/databases/vector/exceptions/exceptions.py b/cognee/infrastructure/databases/vector/exceptions/exceptions.py index b6a8237d8..48d9976e0 100644 --- a/cognee/infrastructure/databases/vector/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/vector/exceptions/exceptions.py @@ -1,8 +1,8 @@ from fastapi import status -from cognee.exceptions import CriticalError +from cognee.exceptions import CogneeValidationError -class CollectionNotFoundError(CriticalError): +class CollectionNotFoundError(CogneeValidationError): """ Represents an error that occurs when a requested collection cannot be found. @@ -16,7 +16,5 @@ class CollectionNotFoundError(CriticalError): message, name: str = "CollectionNotFoundError", status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY, - log=True, - log_level="DEBUG", ): - super().__init__(message, name, status_code, log, log_level) + super().__init__(message, name, status_code) From 6dcd59c73cee29ae3da396724e37e5f02d0b9636 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:07:04 +0200 Subject: [PATCH 13/51] feat: Adds changes to cognee graph part --- .../modules/graph/cognee_graph/CogneeGraph.py | 6 ++--- .../graph/cognee_graph/CogneeGraphElements.py | 16 ++++-------- cognee/modules/graph/exceptions/__init__.py | 2 ++ cognee/modules/graph/exceptions/exceptions.py | 26 ++++++++++++++++--- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index ca1984dfe..bdafaf238 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -2,8 +2,7 @@ import time from cognee.shared.logging_utils import get_logger from typing import List, Dict, Union, Optional, Type -from cognee.exceptions import InvalidValueError -from cognee.modules.graph.exceptions import EntityNotFoundError, EntityAlreadyExistsError +from cognee.modules.graph.exceptions import EntityNotFoundError, EntityAlreadyExistsError, InvalidDimensionsError from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph @@ -66,8 +65,7 @@ class CogneeGraph(CogneeAbstractGraph): node_name: Optional[List[str]] = None, ) -> None: if node_dimension < 1 or edge_dimension < 1: - raise InvalidValueError(message="Dimensions must be positive integers") - + raise InvalidDimensionsError() try: import time diff --git a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py index 063248ee2..c22cc5c18 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py @@ -1,8 +1,6 @@ import numpy as np from typing import List, Dict, Optional, Any, Union - -from cognee.exceptions import InvalidValueError - +from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError class Node: """ @@ -24,7 +22,7 @@ class Node: self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1 ): if dimension <= 0: - raise InvalidValueError(message="Dimension must be a positive integer") + raise InvalidDimensionsError() self.id = node_id self.attributes = attributes if attributes is not None else {} self.attributes["vector_distance"] = float("inf") @@ -58,9 +56,7 @@ class Node: def is_node_alive_in_dimension(self, dimension: int) -> bool: if dimension < 0 or dimension >= len(self.status): - raise InvalidValueError( - message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}." - ) + raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1) return self.status[dimension] == 1 def add_attribute(self, key: str, value: Any) -> None: @@ -110,7 +106,7 @@ class Edge: dimension: int = 1, ): if dimension <= 0: - raise InvalidValueError(message="Dimensions must be a positive integer.") + InvalidDimensionsError() self.node1 = node1 self.node2 = node2 self.attributes = attributes if attributes is not None else {} @@ -120,9 +116,7 @@ class Edge: def is_edge_alive_in_dimension(self, dimension: int) -> bool: if dimension < 0 or dimension >= len(self.status): - raise InvalidValueError( - message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}." - ) + raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1) return self.status[dimension] == 1 def add_attribute(self, key: str, value: Any) -> None: diff --git a/cognee/modules/graph/exceptions/__init__.py b/cognee/modules/graph/exceptions/__init__.py index 5cf600099..04bec74ad 100644 --- a/cognee/modules/graph/exceptions/__init__.py +++ b/cognee/modules/graph/exceptions/__init__.py @@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various graph errors from .exceptions import ( EntityNotFoundError, EntityAlreadyExistsError, + InvalidDimensionsError, + DimensionOutOfRangeError, ) diff --git a/cognee/modules/graph/exceptions/exceptions.py b/cognee/modules/graph/exceptions/exceptions.py index 854e620ff..6fb2d400d 100644 --- a/cognee/modules/graph/exceptions/exceptions.py +++ b/cognee/modules/graph/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeValidationError from fastapi import status -class EntityNotFoundError(CogneeApiError): +class EntityNotFoundError(CogneeValidationError): """Database returns nothing""" def __init__( @@ -14,7 +14,7 @@ class EntityNotFoundError(CogneeApiError): super().__init__(message, name, status_code) -class EntityAlreadyExistsError(CogneeApiError): +class EntityAlreadyExistsError(CogneeValidationError): """Conflict detected, like trying to create a resource that already exists""" def __init__( @@ -24,3 +24,23 @@ class EntityAlreadyExistsError(CogneeApiError): status_code=status.HTTP_409_CONFLICT, ): super().__init__(message, name, status_code) + +class InvalidDimensionsError(CogneeValidationError): + def __init__( + self, + name: str = "InvalidDimensionsError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = "Dimensions must be positive integers." + super().__init__(message, name, status_code) + +class DimensionOutOfRangeError(CogneeValidationError): + def __init__( + self, + dimension: int, + max_index: int, + name: str = "DimensionOutOfRangeError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = f"Dimension {dimension} is out of range. Valid range is 0 to {max_index}." + super().__init__(message, name, status_code) From 32996aa0d08b0c90dd234607bd6436b79e72e5b1 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:40:50 +0200 Subject: [PATCH 14/51] feat: adds new error classes to llm and databases + introduces loglevel and logging from child error --- cognee/exceptions/exceptions.py | 16 +++++++++++---- .../exceptions/EmbeddingException.py | 20 ------------------- .../databases/exceptions/__init__.py | 1 + .../databases/exceptions/exceptions.py | 18 +++++++++++++++++ .../embeddings/LiteLLMEmbeddingEngine.py | 2 +- .../databases/vector/exceptions/exceptions.py | 4 +++- .../vector/pgvector/PGVectorAdapter.py | 4 ++-- cognee/infrastructure/llm/exceptions.py | 8 ++++++++ .../litellm_instructor/llm/openai/adapter.py | 5 ++--- cognee/tasks/storage/index_data_points.py | 2 +- 10 files changed, 48 insertions(+), 32 deletions(-) delete mode 100644 cognee/infrastructure/databases/exceptions/EmbeddingException.py diff --git a/cognee/exceptions/exceptions.py b/cognee/exceptions/exceptions.py index e7eb784b8..9b6cef21d 100644 --- a/cognee/exceptions/exceptions.py +++ b/cognee/exceptions/exceptions.py @@ -43,8 +43,10 @@ class CogneeSystemError(CogneeApiError): message: str = "A system error occurred.", name: str = "CogneeSystemError", status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + log=True, + log_level="ERROR", ): - super().__init__(message, name, status_code) + super().__init__(message, name, status_code, log, log_level) class CogneeValidationError(CogneeApiError): @@ -55,8 +57,10 @@ class CogneeValidationError(CogneeApiError): message: str = "A validation error occurred.", name: str = "CogneeValidationError", status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + log=True, + log_level="ERROR", ): - super().__init__(message, name, status_code) + super().__init__(message, name, status_code, log, log_level) class CogneeConfigurationError(CogneeApiError): @@ -67,8 +71,10 @@ class CogneeConfigurationError(CogneeApiError): message: str = "A system configuration error occurred.", name: str = "CogneeConfigurationError", status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + log=True, + log_level="ERROR", ): - super().__init__(message, name, status_code) + super().__init__(message, name, status_code, log, log_level) class CogneeTransientError(CogneeApiError): @@ -79,6 +85,8 @@ class CogneeTransientError(CogneeApiError): message: str = "A transient error occurred.", name: str = "CogneeTransientError", status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + log=True, + log_level="ERROR", ): - super().__init__(message, name, status_code) + super().__init__(message, name, status_code, log, log_level) diff --git a/cognee/infrastructure/databases/exceptions/EmbeddingException.py b/cognee/infrastructure/databases/exceptions/EmbeddingException.py deleted file mode 100644 index 62616899c..000000000 --- a/cognee/infrastructure/databases/exceptions/EmbeddingException.py +++ /dev/null @@ -1,20 +0,0 @@ -from cognee.exceptions import CogneeApiError -from fastapi import status - - -class EmbeddingException(CogneeApiError): - """ - Custom exception for handling embedding-related errors. - - This exception class is designed to indicate issues specifically related to embeddings - within the application. It extends the base exception class CogneeApiError and allows - for customization of the error message, name, and status code. - """ - - def __init__( - self, - message: str = "Embedding Exception.", - name: str = "EmbeddingException", - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - ): - super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/databases/exceptions/__init__.py b/cognee/infrastructure/databases/exceptions/__init__.py index 9d8d18567..c7d2a8feb 100644 --- a/cognee/infrastructure/databases/exceptions/__init__.py +++ b/cognee/infrastructure/databases/exceptions/__init__.py @@ -8,4 +8,5 @@ from .exceptions import ( EntityNotFoundError, EntityAlreadyExistsError, DatabaseNotCreatedError, + EmbeddingException, ) diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 4aba09d37..66740fa5e 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -84,3 +84,21 @@ class NodesetFilterNotSupportedError(CogneeConfigurationError): self.message = message self.name = name self.status_code = status_code + + +class EmbeddingException(CogneeConfigurationError): + """ + Custom exception for handling embedding-related errors. + + This exception class is designed to indicate issues specifically related to embeddings + within the application. It extends the base exception class CogneeConfigurationError allows + for customization of the error message, name, and status code. + """ + + def __init__( + self, + message: str = "Embedding Exception.", + name: str = "EmbeddingException", + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + ): + super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py index e3cdaea00..dae664907 100644 --- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py @@ -6,7 +6,7 @@ import math import litellm import os from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine -from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException +from cognee.infrastructure.databases.exceptions import EmbeddingException from cognee.infrastructure.llm.tokenizer.Gemini import ( GeminiTokenizer, ) diff --git a/cognee/infrastructure/databases/vector/exceptions/exceptions.py b/cognee/infrastructure/databases/vector/exceptions/exceptions.py index 48d9976e0..ee0712433 100644 --- a/cognee/infrastructure/databases/vector/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/vector/exceptions/exceptions.py @@ -16,5 +16,7 @@ class CollectionNotFoundError(CogneeValidationError): message, name: str = "CollectionNotFoundError", status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY, + log=True, + log_level="DEBUG", ): - super().__init__(message, name, status_code) + super().__init__(message, name, status_code, log, log_level) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index ac40b91b8..96b2056c4 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -9,7 +9,7 @@ from sqlalchemy.exc import ProgrammingError from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError -from cognee.exceptions import InvalidValueError + from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id @@ -275,7 +275,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): return metadata.tables[collection_name] else: raise CollectionNotFoundError( - f"Collection '{collection_name}' not found!", log_level="DEBUG" + f"Collection '{collection_name}' not found!", ) async def retrieve(self, collection_name: str, data_point_ids: List[str]): diff --git a/cognee/infrastructure/llm/exceptions.py b/cognee/infrastructure/llm/exceptions.py index 287820448..c8f5726fb 100644 --- a/cognee/infrastructure/llm/exceptions.py +++ b/cognee/infrastructure/llm/exceptions.py @@ -20,3 +20,11 @@ class UnsupportedLLMProviderError(CogneeValidationError): def __init__(self, provider: str): message = f"Unsupported LLM provider: {provider}" super().__init__(message=message, name="UnsupportedLLMProviderError") + +class MissingSystemPromptPathError(CogneeValidationError): + def __init__( + self, + name: str = "MissingSystemPromptPathError", + ): + message = "No system prompt path provided." + super().__init__(message, name) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 69c1bac1c..38e1bc82e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -7,12 +7,11 @@ from openai import ContentFilterFinishReasonError from litellm.exceptions import ContentPolicyViolationError from instructor.exceptions import InstructorRetryException -from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) -from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError +from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError, MissingSystemPromptPathError from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( rate_limit_async, @@ -325,7 +324,7 @@ class OpenAIAdapter(LLMInterface): if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) formatted_prompt = ( diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py index 51b6c2d6e..9c363c04c 100644 --- a/cognee/tasks/storage/index_data_points.py +++ b/cognee/tasks/storage/index_data_points.py @@ -1,6 +1,6 @@ from cognee.shared.logging_utils import get_logger -from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException +from cognee.infrastructure.databases.exceptions import EmbeddingException from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.engine import DataPoint From bed523a36b18b8b6ac76de69f344973baf5cd043 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:59:12 +0200 Subject: [PATCH 15/51] feat: api error handling restruct --- cognee/api/v1/config/config.py | 12 ++----- cognee/api/v1/delete/delete.py | 2 +- cognee/api/v1/delete/exceptions.py | 50 -------------------------- cognee/api/v1/exceptions/__init__.py | 6 +++- cognee/api/v1/exceptions/exceptions.py | 37 +++++++++++++++++++ 5 files changed, 46 insertions(+), 61 deletions(-) delete mode 100644 cognee/api/v1/delete/exceptions.py diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index 03df700cb..464753438 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -129,9 +129,7 @@ class config: if hasattr(relational_db_config, key): object.__setattr__(relational_db_config, key, value) else: - raise InvalidAttributeError( - message=f"'{key}' is not a valid attribute of the config." - ) + raise InvalidConfigAttributeError(attribute=key) @staticmethod def set_migration_db_config(config_dict: dict): @@ -143,9 +141,7 @@ class config: if hasattr(migration_db_config, key): object.__setattr__(migration_db_config, key, value) else: - raise InvalidAttributeError( - message=f"'{key}' is not a valid attribute of the config." - ) + raise InvalidConfigAttributeError(attribute=key) @staticmethod def set_graph_db_config(config_dict: dict) -> None: @@ -169,9 +165,7 @@ class config: if hasattr(vector_db_config, key): object.__setattr__(vector_db_config, key, value) else: - raise InvalidAttributeError( - message=f"'{key}' is not a valid attribute of the config." - ) + InvalidConfigAttributeError(attribute=key) @staticmethod def set_vector_db_key(db_key: str): diff --git a/cognee/api/v1/delete/delete.py b/cognee/api/v1/delete/delete.py index 98f6cb9fc..73f264670 100644 --- a/cognee/api/v1/delete/delete.py +++ b/cognee/api/v1/delete/delete.py @@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.context_global_variables import set_database_global_context_variables -from cognee.api.v1.delete.exceptions import ( +from cognee.api.v1.exceptions import ( DocumentNotFoundError, DatasetNotFoundError, DocumentSubgraphNotFoundError, diff --git a/cognee/api/v1/delete/exceptions.py b/cognee/api/v1/delete/exceptions.py deleted file mode 100644 index a4d2a77ed..000000000 --- a/cognee/api/v1/delete/exceptions.py +++ /dev/null @@ -1,50 +0,0 @@ -from cognee.exceptions import CogneeApiError -from fastapi import status - - -class DocumentNotFoundError(CogneeApiError): - """Raised when a document cannot be found in the database.""" - - def __init__( - self, - message: str = "Document not found in database.", - name: str = "DocumentNotFoundError", - status_code: int = status.HTTP_404_NOT_FOUND, - ): - super().__init__(message, name, status_code) - - -class DatasetNotFoundError(CogneeApiError): - """Raised when a dataset cannot be found.""" - - def __init__( - self, - message: str = "Dataset not found.", - name: str = "DatasetNotFoundError", - status_code: int = status.HTTP_404_NOT_FOUND, - ): - super().__init__(message, name, status_code) - - -class DataNotFoundError(CogneeApiError): - """Raised when a dataset cannot be found.""" - - def __init__( - self, - message: str = "Data not found.", - name: str = "DataNotFoundError", - status_code: int = status.HTTP_404_NOT_FOUND, - ): - super().__init__(message, name, status_code) - - -class DocumentSubgraphNotFoundError(CogneeApiError): - """Raised when a document's subgraph cannot be found in the graph database.""" - - def __init__( - self, - message: str = "Document subgraph not found in graph database.", - name: str = "DocumentSubgraphNotFoundError", - status_code: int = status.HTTP_404_NOT_FOUND, - ): - super().__init__(message, name, status_code) diff --git a/cognee/api/v1/exceptions/__init__.py b/cognee/api/v1/exceptions/__init__.py index f18b9f99a..b2948934c 100644 --- a/cognee/api/v1/exceptions/__init__.py +++ b/cognee/api/v1/exceptions/__init__.py @@ -5,5 +5,9 @@ This module defines a set of exceptions for handling various data errors """ from .exceptions import ( - InvalidConfigAttributeError + InvalidConfigAttributeError, + DocumentNotFoundError, + DatasetNotFoundError, + DataNotFoundError, + DocumentSubgraphNotFoundError ) diff --git a/cognee/api/v1/exceptions/exceptions.py b/cognee/api/v1/exceptions/exceptions.py index 9875d179b..8b65691f1 100644 --- a/cognee/api/v1/exceptions/exceptions.py +++ b/cognee/api/v1/exceptions/exceptions.py @@ -1,5 +1,6 @@ from cognee.exceptions import ( CogneeConfigurationError, + CogneeValidationError ) from fastapi import status @@ -13,3 +14,39 @@ class InvalidConfigAttributeError(CogneeConfigurationError): ): message = f"'{attribute}' is not a valid attribute of the configuration." super().__init__(message, name, status_code) + +class DocumentNotFoundError(CogneeValidationError): + def __init__( + self, + message: str = "Document not found in database.", + name: str = "DocumentNotFoundError", + status_code: int = status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) + +class DatasetNotFoundError(CogneeValidationError): + def __init__( + self, + message: str = "Dataset not found.", + name: str = "DatasetNotFoundError", + status_code: int = status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) + +class DataNotFoundError(CogneeValidationError): + def __init__( + self, + message: str = "Data not found.", + name: str = "DataNotFoundError", + status_code: int = status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) + +class DocumentSubgraphNotFoundError(CogneeValidationError): + def __init__( + self, + message: str = "Document subgraph not found in graph database.", + name: str = "DocumentSubgraphNotFoundError", + status_code: int = status.HTTP_404_NOT_FOUND, + ): + super().__init__(message, name, status_code) From 544e08930b3ab43f5a936024e9d41542e60ec297 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:42:57 +0200 Subject: [PATCH 16/51] feat: removing invalidValueErrors --- cognee-mcp/src/server.py | 16 ----------- cognee/api/v1/add/add.py | 5 ---- cognee/api/v1/cognify/cognify.py | 8 ------ cognee/api/v1/search/search.py | 7 ----- .../data/utils/extract_keywords.py | 2 +- .../databases/exceptions/__init__.py | 2 ++ .../databases/exceptions/exceptions.py | 28 +++++++++++++++++++ .../hybrid/falkordb/FalkorDBAdapter.py | 4 +-- .../NeptuneAnalyticsAdapter.py | 9 +++--- .../vector/chromadb/ChromaDBAdapter.py | 4 +-- .../vector/lancedb/LanceDBAdapter.py | 4 +-- .../vector/pgvector/PGVectorAdapter.py | 3 +- .../databases/vector/qdrant/QDrantAdapter.py | 6 ++-- .../vector/weaviate_db/WeaviateAdapter.py | 6 ++-- .../llm/anthropic/adapter.py | 4 +-- .../litellm_instructor/llm/gemini/adapter.py | 6 ++-- .../litellm_instructor/llm/get_llm_client.py | 4 +-- .../litellm_instructor/llm/openai/adapter.py | 2 +- .../graph/cognee_graph_elements_test.py | 11 ++++---- 19 files changed, 62 insertions(+), 69 deletions(-) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index a657225f5..3e65a5eb7 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -221,14 +221,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str - The actual cognify process may take significant time depending on text length - Use the cognify_status tool to check the progress of the operation - Raises - ------ - InvalidValueError - If LLM_API_KEY is not set - ValueError - If chunks exceed max token limits (reduce chunk_size) - DatabaseNotCreatedError - If databases are not properly initialized """ async def cognify_task( @@ -512,14 +504,6 @@ async def search(search_query: str, search_type: str) -> list: - Different search types produce different output formats - The function handles the conversion between Cognee's internal result format and MCP's output format - Raises - ------ - InvalidValueError - If LLM_API_KEY is not set (for LLM-based search types) - ValueError - If query_text is empty or search parameters are invalid - NoDataError - If no relevant data found for the search query """ async def search_task(search_query: str, search_type: str) -> str: diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 3e4aaae49..a9ad42923 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -133,11 +133,6 @@ async def add( - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate" - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx" - Raises: - FileNotFoundError: If specified file paths don't exist - PermissionError: If user lacks access to files or dataset - UnsupportedFileTypeError: If file format cannot be processed - InvalidValueError: If LLM_API_KEY is not set or invalid """ tasks = [ Task(resolve_data_directories, include_subdirectories=True), diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index c6508f3a7..23984b9a6 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -177,14 +177,6 @@ async def cognify( - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) - - Raises: - DatasetNotFoundError: If specified datasets don't exist - PermissionError: If user lacks processing rights - InvalidValueError: If LLM_API_KEY is not set - OntologyParsingError: If ontology file is malformed - ValueError: If chunks exceed max token limits (reduce chunk_size) - DatabaseNotCreatedError: If databases are not properly initialized """ tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 66ce48cc2..f4f4831c1 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -158,13 +158,6 @@ async def search( - VECTOR_DB_PROVIDER: Must match what was used during cognify - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify - Raises: - DatasetNotFoundError: If specified datasets don't exist or aren't accessible - PermissionDeniedError: If user lacks read access to requested datasets - NoDataError: If no relevant data found for the search query - InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types) - ValueError: If query_text is empty or search parameters are invalid - CollectionNotFoundError: If vector collection not found (data not processed) """ # We use lists from now on for datasets if isinstance(datasets, UUID) or isinstance(datasets, str): diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index 2915131a4..8085459c9 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -8,7 +8,7 @@ def extract_keywords(text: str) -> list[str]: """ Extract keywords from the provided text string. - This function raises an InvalidValueError if the input text is empty. It processes the + This function raises an KeyWordExtractionError if the input text is empty. It processes the text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most relevant keywords based on their frequency. The function returns a list of up to 15 keywords, each having more than 3 characters. diff --git a/cognee/infrastructure/databases/exceptions/__init__.py b/cognee/infrastructure/databases/exceptions/__init__.py index c7d2a8feb..56deaac74 100644 --- a/cognee/infrastructure/databases/exceptions/__init__.py +++ b/cognee/infrastructure/databases/exceptions/__init__.py @@ -9,4 +9,6 @@ from .exceptions import ( EntityAlreadyExistsError, DatabaseNotCreatedError, EmbeddingException, + MissingQueryParameterError, + MutuallyExclusiveQueryParametersError ) diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 66740fa5e..6e1bb74f6 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -102,3 +102,31 @@ class EmbeddingException(CogneeConfigurationError): status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, ): super().__init__(message, name, status_code) + +class MissingQueryParameterError(CogneeValidationError): + """ + Raised when neither 'query_text' nor 'query_vector' is provided, + and at least one is required to perform the operation. + """ + def __init__( + self, + name: str = "MissingQueryParameterError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = "One of query_text or query_vector must be provided!" + super().__init__(message, name, status_code) + +class MutuallyExclusiveQueryParametersError(CogneeValidationError): + """ + Raised when both 'text' and 'embedding' are provided to the search function, + but only one type of input is allowed at a time. + """ + def __init__( + self, + name: str = "MutuallyExclusiveQueryParametersError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = ( + "The search function accepts either text or embedding as input, but not both." + ) + super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py index 35ce7c77e..cb6899925 100644 --- a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +++ b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py @@ -9,7 +9,7 @@ from typing import List, Dict, Any, Optional, Tuple, Type, Union from falkordb import FalkorDB -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.databases.graph.graph_db_interface import ( GraphDBInterface, record_graph_changes, @@ -721,7 +721,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface): Returns the search results as a result set from the graph database. """ if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embed_data([query_text]))[0] diff --git a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py index a04e6f09e..4baf8ff13 100644 --- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py @@ -5,7 +5,8 @@ import json from typing import List, Optional, Any, Dict, Type, Tuple from uuid import UUID -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError +from cognee.infrastructure.databases.exceptions import MutuallyExclusiveQueryParametersError from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface from cognee.infrastructure.engine import DataPoint @@ -274,11 +275,9 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface): limit = self._TOPK_UPPER_BOUND if query_vector and query_text: - raise InvalidValueError( - message="The search function accepts either text or embedding as input, but not both." - ) + raise MutuallyExclusiveQueryParametersError() elif query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() elif query_vector: embedding = query_vector else: diff --git a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py index d4b858348..81f47dddd 100644 --- a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +++ b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py @@ -3,13 +3,13 @@ from uuid import UUID from typing import List, Optional from chromadb import AsyncHttpClient, Settings -from cognee.exceptions import InvalidValueError from cognee.shared.logging_utils import get_logger from cognee.modules.storage.utils import get_own_properties from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from ..embeddings.EmbeddingEngine import EmbeddingEngine from ..vector_db_interface import VectorDBInterface @@ -373,7 +373,7 @@ class ChromaDBAdapter(VectorDBInterface): Returns a list of ScoredResult instances representing the search results. """ if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index f37c83113..0184ec3ee 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from lancedb.pydantic import LanceModel, Vector from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.files.storage import get_file_storage @@ -228,7 +228,7 @@ class LanceDBAdapter(VectorDBInterface): normalized: bool = True, ): if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 96b2056c4..4dfd9792f 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from distributed.utils import override_distributed from distributed.tasks.queued_add_data_points import queued_add_data_points +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from ...relational.ModelBase import Base from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter @@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): with_vector: bool = False, ) -> List[ScoredResult]: if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py index 716fc969f..aa1368716 100644 --- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py @@ -4,7 +4,7 @@ from qdrant_client import AsyncQdrantClient, models from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine.utils import parse_id -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult @@ -363,7 +363,7 @@ class QDrantAdapter(VectorDBInterface): Search for data points in a collection based on either a textual query or a vector query. - Raises InvalidValueError if both query_text and query_vector are None. + Raises MissingQueryParameterError if both query_text and query_vector are None. Returns a list of scored results that match the search criteria. @@ -388,7 +388,7 @@ class QDrantAdapter(VectorDBInterface): from qdrant_client.http.exceptions import UnexpectedResponse if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if not await self.has_collection(collection_name): return [] diff --git a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py index 00a6a0411..db68e004e 100644 --- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py @@ -3,7 +3,7 @@ from typing import List, Optional from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential from cognee.shared.logging_utils import get_logger -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError @@ -390,7 +390,7 @@ class WeaviateAdapter(VectorDBInterface): """ Perform a search on a collection using either a text query or a vector query. - Return scored results based on the search criteria provided. Raise InvalidValueError if + Return scored results based on the search criteria provided. Raise MissingQueryParameterError if no query is provided. Parameters: @@ -413,7 +413,7 @@ class WeaviateAdapter(VectorDBInterface): import weaviate.exceptions if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_vector is None: query_vector = (await self.embed_data([query_text]))[0] diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index 636e6c0f2..6845fb6aa 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -2,7 +2,7 @@ from typing import Type from pydantic import BaseModel import instructor -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) @@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface): if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 61d42ff5f..3cde1fdc4 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError from cognee.shared.logging_utils import get_logger from cognee.modules.observability.get_observe import get_observe -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) @@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface): """ Format and display the prompt for a user query. - Raises an InvalidValueError if no system prompt is provided. + Raises an MissingQueryParameterError if no system prompt is provided. Parameters: ----------- @@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface): if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) formatted_prompt = ( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 3006a795b..fd347aef3 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -38,7 +38,7 @@ def get_llm_client(): This function retrieves the configuration for the LLM provider and model, and initializes the appropriate LLM client adapter accordingly. It raises an - InvalidValueError if the LLM API key is not set for certain providers or if the provider + LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider is unsupported. Returns: @@ -62,7 +62,7 @@ def get_llm_client(): if provider == LLMProvider.OPENAI: if llm_config.llm_api_key is None: - raise InvalidValueError(message="LLM API key is not set.") + raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import ( OpenAIAdapter, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 38e1bc82e..4126b3e13 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -307,7 +307,7 @@ class OpenAIAdapter(LLMInterface): Format and display the prompt for a user query. This method formats the prompt using the provided user input and system prompt, - returning a string representation. Raises InvalidValueError if the system prompt is not + returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not provided. Parameters: diff --git a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py index 000856b12..a852bcee3 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py @@ -1,9 +1,8 @@ import numpy as np import pytest -from cognee.exceptions import InvalidValueError from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node - +from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError def test_node_initialization(): """Test that a Node is initialized correctly.""" @@ -16,7 +15,7 @@ def test_node_initialization(): def test_node_invalid_dimension(): """Test that initializing a Node with a non-positive dimension raises an error.""" - with pytest.raises(InvalidValueError, match="Dimension must be a positive integer"): + with pytest.raises(InvalidDimensionsError, match="Dimensions must be a positive integers"): Node("node1", dimension=0) @@ -69,7 +68,7 @@ def test_is_node_alive_in_dimension(): def test_node_alive_invalid_dimension(): """Test that checking alive status with an invalid dimension raises an error.""" node = Node("node1", dimension=1) - with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): node.is_node_alive_in_dimension(1) @@ -106,7 +105,7 @@ def test_edge_invalid_dimension(): """Test that initializing an Edge with a non-positive dimension raises an error.""" node1 = Node("node1") node2 = Node("node2") - with pytest.raises(InvalidValueError, match="Dimensions must be a positive integer."): + with pytest.raises(DimensionOutOfRangeError, match="Dimensions must be a positive integer."): Edge(node1, node2, dimension=0) @@ -125,7 +124,7 @@ def test_edge_alive_invalid_dimension(): node1 = Node("node1") node2 = Node("node2") edge = Edge(node1, node2, dimension=1) - with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): edge.is_edge_alive_in_dimension(1) From 885f7c3f9915adada0d2ef62e679d60e4ac335e7 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:58:56 +0200 Subject: [PATCH 17/51] chore: fixing graph elements tests --- cognee/modules/graph/cognee_graph/CogneeGraphElements.py | 2 +- .../unit/modules/graph/cognee_graph_elements_test.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py index c22cc5c18..2d8917d4a 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py @@ -106,7 +106,7 @@ class Edge: dimension: int = 1, ): if dimension <= 0: - InvalidDimensionsError() + raise InvalidDimensionsError() self.node1 = node1 self.node2 = node2 self.attributes = attributes if attributes is not None else {} diff --git a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py index a852bcee3..a8fb17268 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py @@ -15,7 +15,7 @@ def test_node_initialization(): def test_node_invalid_dimension(): """Test that initializing a Node with a non-positive dimension raises an error.""" - with pytest.raises(InvalidDimensionsError, match="Dimensions must be a positive integers"): + with pytest.raises(InvalidDimensionsError): Node("node1", dimension=0) @@ -68,7 +68,7 @@ def test_is_node_alive_in_dimension(): def test_node_alive_invalid_dimension(): """Test that checking alive status with an invalid dimension raises an error.""" node = Node("node1", dimension=1) - with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError): node.is_node_alive_in_dimension(1) @@ -105,7 +105,7 @@ def test_edge_invalid_dimension(): """Test that initializing an Edge with a non-positive dimension raises an error.""" node1 = Node("node1") node2 = Node("node2") - with pytest.raises(DimensionOutOfRangeError, match="Dimensions must be a positive integer."): + with pytest.raises(InvalidDimensionsError): Edge(node1, node2, dimension=0) @@ -124,7 +124,7 @@ def test_edge_alive_invalid_dimension(): node1 = Node("node1") node2 = Node("node2") edge = Edge(node1, node2, dimension=1) - with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError): edge.is_edge_alive_in_dimension(1) From d1bfeaa0f27a8f53d2b58819dd88d155e36a9a9d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:00:25 +0200 Subject: [PATCH 18/51] fix: fixes search unit test error expectation --- cognee/tests/unit/modules/search/search_methods_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tests/unit/modules/search/search_methods_test.py b/cognee/tests/unit/modules/search/search_methods_test.py index 14712f6d2..8e9afff1c 100644 --- a/cognee/tests/unit/modules/search/search_methods_test.py +++ b/cognee/tests/unit/modules/search/search_methods_test.py @@ -5,7 +5,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from pylint.checkers.utils import node_type -from cognee.exceptions import InvalidValueError +from cognee.modules.search.exceptions import UnsupportedSearchTypeError from cognee.modules.search.methods.search import search, specific_search from cognee.modules.search.types import SearchType from cognee.modules.users.models import User @@ -217,7 +217,7 @@ async def test_specific_search_invalid_type(mock_user): query_type = "INVALID_TYPE" # Not a valid SearchType # Execute and verify - with pytest.raises(InvalidValueError) as excinfo: + with pytest.raises(UnsupportedSearchTypeError) as excinfo: await specific_search(query_type, query, mock_user) assert "Unsupported search type" in str(excinfo.value) From fabbd638a4ef00391f313f8b57df85ea25381f30 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:03:12 +0200 Subject: [PATCH 19/51] chore: renaming error --- cognee/modules/data/exceptions/exceptions.py | 4 ++-- cognee/modules/data/methods/delete_data.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index c85e11cb2..ac3b68e64 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -45,11 +45,11 @@ class DatasetTypeError(CogneeValidationError): super().__init__(message, name, status_code) -class InvalidAttributeError(CogneeValidationError): +class InvalidTableAttributeError(CogneeValidationError): def __init__( self, message: str = "The provided data object is missing the required '__tablename__' attribute.", - name: str = "InvalidAttributeError", + name: str = "InvalidTableAttributeError", status_code: int = status.HTTP_400_BAD_REQUEST, ): super().__init__(message, name, status_code) diff --git a/cognee/modules/data/methods/delete_data.py b/cognee/modules/data/methods/delete_data.py index 6ec055fcc..5425acac3 100644 --- a/cognee/modules/data/methods/delete_data.py +++ b/cognee/modules/data/methods/delete_data.py @@ -1,4 +1,4 @@ -from cognee.modules.data.exceptions.exceptions import InvalidAttributeError +from cognee.modules.data.exceptions.exceptions import InvalidTableAttributeError from cognee.modules.data.models import Data from cognee.infrastructure.databases.relational import get_relational_engine @@ -13,7 +13,7 @@ async def delete_data(data: Data): ValueError: If the data object is invalid. """ if not hasattr(data, "__tablename__"): - raise InvalidAttributeError() + raise InvalidTableAttributeError() db_engine = get_relational_engine() From 59c9204a74c8c357e784335223f6136c133e1599 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:03:44 +0200 Subject: [PATCH 20/51] Update exceptions.py --- cognee/infrastructure/databases/vector/exceptions/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/vector/exceptions/exceptions.py b/cognee/infrastructure/databases/vector/exceptions/exceptions.py index ee0712433..ecd106c0d 100644 --- a/cognee/infrastructure/databases/vector/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/vector/exceptions/exceptions.py @@ -6,7 +6,7 @@ class CollectionNotFoundError(CogneeValidationError): """ Represents an error that occurs when a requested collection cannot be found. - This class extends the CriticalError to handle specific cases where a requested + This class extends the CogneeValidationError to handle specific cases where a requested collection is unavailable. It can be initialized with a custom message and allows for logging options including log level and whether to log the error. """ From 68327d3ab9fe0b99aa6aef7fc4fe3bd78554ef1c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:14:43 +0200 Subject: [PATCH 21/51] chore: Changing base classes for exceptions that were already defined --- .../databases/graph/neptune_driver/exceptions.py | 4 ++-- cognee/modules/ingestion/exceptions/exceptions.py | 4 ++-- cognee/modules/ontology/exceptions/exceptions.py | 8 ++++---- cognee/modules/pipelines/exceptions/exceptions.py | 4 ++-- cognee/modules/users/exceptions/exceptions.py | 12 ++++++------ cognee/shared/exceptions/exceptions.py | 4 ++-- cognee/tasks/completion/exceptions/exceptions.py | 4 ++-- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py b/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py index 57d54d74d..d78936202 100644 --- a/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +++ b/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py @@ -3,11 +3,11 @@ This module defines custom exceptions for Neptune Analytics operations. """ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeSystemError from fastapi import status -class NeptuneAnalyticsError(CogneeApiError): +class NeptuneAnalyticsError(CogneeSystemError): """Base exception for Neptune Analytics operations.""" def __init__( diff --git a/cognee/modules/ingestion/exceptions/exceptions.py b/cognee/modules/ingestion/exceptions/exceptions.py index 08991a946..d43b69d60 100644 --- a/cognee/modules/ingestion/exceptions/exceptions.py +++ b/cognee/modules/ingestion/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeValidationError from fastapi import status -class IngestionError(CogneeApiError): +class IngestionError(CogneeValidationError): def __init__( self, message: str = "Type of data sent to classify not supported.", diff --git a/cognee/modules/ontology/exceptions/exceptions.py b/cognee/modules/ontology/exceptions/exceptions.py index 511e41524..daa8dcdb5 100644 --- a/cognee/modules/ontology/exceptions/exceptions.py +++ b/cognee/modules/ontology/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeSystemError from fastapi import status -class OntologyInitializationError(CogneeApiError): +class OntologyInitializationError(CogneeSystemError): def __init__( self, message: str = "Ontology initialization failed", @@ -12,7 +12,7 @@ class OntologyInitializationError(CogneeApiError): super().__init__(message, name, status_code) -class FindClosestMatchError(CogneeApiError): +class FindClosestMatchError(CogneeSystemError): def __init__( self, message: str = "Error in find_closest_match", @@ -22,7 +22,7 @@ class FindClosestMatchError(CogneeApiError): super().__init__(message, name, status_code) -class GetSubgraphError(CogneeApiError): +class GetSubgraphError(CogneeSystemError): def __init__( self, message: str = "Failed to retrieve subgraph", diff --git a/cognee/modules/pipelines/exceptions/exceptions.py b/cognee/modules/pipelines/exceptions/exceptions.py index 0a4863075..646a51286 100644 --- a/cognee/modules/pipelines/exceptions/exceptions.py +++ b/cognee/modules/pipelines/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeSystemError from fastapi import status -class PipelineRunFailedError(CogneeApiError): +class PipelineRunFailedError(CogneeSystemError): def __init__( self, message: str = "Pipeline run failed.", diff --git a/cognee/modules/users/exceptions/exceptions.py b/cognee/modules/users/exceptions/exceptions.py index a7484c05e..85ca8ab63 100644 --- a/cognee/modules/users/exceptions/exceptions.py +++ b/cognee/modules/users/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeValidationError from fastapi import status -class RoleNotFoundError(CogneeApiError): +class RoleNotFoundError(CogneeValidationError): """User group not found""" def __init__( @@ -14,7 +14,7 @@ class RoleNotFoundError(CogneeApiError): super().__init__(message, name, status_code) -class TenantNotFoundError(CogneeApiError): +class TenantNotFoundError(CogneeValidationError): """User group not found""" def __init__( @@ -26,7 +26,7 @@ class TenantNotFoundError(CogneeApiError): super().__init__(message, name, status_code) -class UserNotFoundError(CogneeApiError): +class UserNotFoundError(CogneeValidationError): """User not found""" def __init__( @@ -38,7 +38,7 @@ class UserNotFoundError(CogneeApiError): super().__init__(message, name, status_code) -class PermissionDeniedError(CogneeApiError): +class PermissionDeniedError(CogneeValidationError): def __init__( self, message: str = "User does not have permission on documents.", @@ -48,7 +48,7 @@ class PermissionDeniedError(CogneeApiError): super().__init__(message, name, status_code) -class PermissionNotFoundError(CogneeApiError): +class PermissionNotFoundError(CogneeValidationError): def __init__( self, message: str = "Permission type does not exist.", diff --git a/cognee/shared/exceptions/exceptions.py b/cognee/shared/exceptions/exceptions.py index 4b4164995..43084e04b 100644 --- a/cognee/shared/exceptions/exceptions.py +++ b/cognee/shared/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeValidationError from fastapi import status -class IngestionError(CogneeApiError): +class IngestionError(CogneeValidationError): def __init__( self, message: str = "Failed to load data.", diff --git a/cognee/tasks/completion/exceptions/exceptions.py b/cognee/tasks/completion/exceptions/exceptions.py index ac105a966..5052e2a77 100644 --- a/cognee/tasks/completion/exceptions/exceptions.py +++ b/cognee/tasks/completion/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeValidationError from fastapi import status -class NoRelevantDataError(CogneeApiError): +class NoRelevantDataError(CogneeValidationError): """ Represents an error when no relevant data is found during a search. This class is a subclass of CogneeApiError. From da40365932ce7056719dd323d6c721214e6919fa Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:15:39 +0200 Subject: [PATCH 22/51] ruff formatting --- cognee/api/v1/exceptions/__init__.py | 2 +- cognee/api/v1/exceptions/exceptions.py | 9 +++++---- cognee/exceptions/exceptions.py | 1 - cognee/infrastructure/data/exceptions/__init__.py | 4 +--- cognee/infrastructure/databases/exceptions/__init__.py | 2 +- cognee/infrastructure/databases/exceptions/exceptions.py | 8 +++++--- cognee/infrastructure/llm/exceptions.py | 3 +++ .../litellm_instructor/llm/openai/adapter.py | 5 ++++- cognee/modules/graph/cognee_graph/CogneeGraph.py | 6 +++++- cognee/modules/graph/cognee_graph/CogneeGraphElements.py | 1 + cognee/modules/graph/exceptions/exceptions.py | 2 ++ cognee/modules/search/exceptions/__init__.py | 4 +--- .../unit/modules/graph/cognee_graph_elements_test.py | 1 + 13 files changed, 30 insertions(+), 18 deletions(-) diff --git a/cognee/api/v1/exceptions/__init__.py b/cognee/api/v1/exceptions/__init__.py index b2948934c..5767a5801 100644 --- a/cognee/api/v1/exceptions/__init__.py +++ b/cognee/api/v1/exceptions/__init__.py @@ -9,5 +9,5 @@ from .exceptions import ( DocumentNotFoundError, DatasetNotFoundError, DataNotFoundError, - DocumentSubgraphNotFoundError + DocumentSubgraphNotFoundError, ) diff --git a/cognee/api/v1/exceptions/exceptions.py b/cognee/api/v1/exceptions/exceptions.py index 8b65691f1..12dfa9d37 100644 --- a/cognee/api/v1/exceptions/exceptions.py +++ b/cognee/api/v1/exceptions/exceptions.py @@ -1,7 +1,4 @@ -from cognee.exceptions import ( - CogneeConfigurationError, - CogneeValidationError -) +from cognee.exceptions import CogneeConfigurationError, CogneeValidationError from fastapi import status @@ -15,6 +12,7 @@ class InvalidConfigAttributeError(CogneeConfigurationError): message = f"'{attribute}' is not a valid attribute of the configuration." super().__init__(message, name, status_code) + class DocumentNotFoundError(CogneeValidationError): def __init__( self, @@ -24,6 +22,7 @@ class DocumentNotFoundError(CogneeValidationError): ): super().__init__(message, name, status_code) + class DatasetNotFoundError(CogneeValidationError): def __init__( self, @@ -33,6 +32,7 @@ class DatasetNotFoundError(CogneeValidationError): ): super().__init__(message, name, status_code) + class DataNotFoundError(CogneeValidationError): def __init__( self, @@ -42,6 +42,7 @@ class DataNotFoundError(CogneeValidationError): ): super().__init__(message, name, status_code) + class DocumentSubgraphNotFoundError(CogneeValidationError): def __init__( self, diff --git a/cognee/exceptions/exceptions.py b/cognee/exceptions/exceptions.py index 9b6cef21d..d956d9cef 100644 --- a/cognee/exceptions/exceptions.py +++ b/cognee/exceptions/exceptions.py @@ -89,4 +89,3 @@ class CogneeTransientError(CogneeApiError): log_level="ERROR", ): super().__init__(message, name, status_code, log, log_level) - diff --git a/cognee/infrastructure/data/exceptions/__init__.py b/cognee/infrastructure/data/exceptions/__init__.py index 6735200ed..cc5f6044f 100644 --- a/cognee/infrastructure/data/exceptions/__init__.py +++ b/cognee/infrastructure/data/exceptions/__init__.py @@ -4,6 +4,4 @@ Custom exceptions for the Cognee API. This module defines a set of exceptions for handling various data errors """ -from .exceptions import ( - KeywordExtractionError -) +from .exceptions import KeywordExtractionError diff --git a/cognee/infrastructure/databases/exceptions/__init__.py b/cognee/infrastructure/databases/exceptions/__init__.py index 56deaac74..2969b1c59 100644 --- a/cognee/infrastructure/databases/exceptions/__init__.py +++ b/cognee/infrastructure/databases/exceptions/__init__.py @@ -10,5 +10,5 @@ from .exceptions import ( DatabaseNotCreatedError, EmbeddingException, MissingQueryParameterError, - MutuallyExclusiveQueryParametersError + MutuallyExclusiveQueryParametersError, ) diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 6e1bb74f6..c240d3133 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -103,11 +103,13 @@ class EmbeddingException(CogneeConfigurationError): ): super().__init__(message, name, status_code) + class MissingQueryParameterError(CogneeValidationError): """ Raised when neither 'query_text' nor 'query_vector' is provided, and at least one is required to perform the operation. """ + def __init__( self, name: str = "MissingQueryParameterError", @@ -116,17 +118,17 @@ class MissingQueryParameterError(CogneeValidationError): message = "One of query_text or query_vector must be provided!" super().__init__(message, name, status_code) + class MutuallyExclusiveQueryParametersError(CogneeValidationError): """ Raised when both 'text' and 'embedding' are provided to the search function, but only one type of input is allowed at a time. """ + def __init__( self, name: str = "MutuallyExclusiveQueryParametersError", status_code: int = status.HTTP_400_BAD_REQUEST, ): - message = ( - "The search function accepts either text or embedding as input, but not both." - ) + message = "The search function accepts either text or embedding as input, but not both." super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/llm/exceptions.py b/cognee/infrastructure/llm/exceptions.py index c8f5726fb..1d390a951 100644 --- a/cognee/infrastructure/llm/exceptions.py +++ b/cognee/infrastructure/llm/exceptions.py @@ -9,6 +9,7 @@ class LLMAPIKeyNotSetError(CogneeValidationError): """ Raised when the LLM API key is not set in the configuration. """ + def __init__(self, message: str = "LLM API key is not set."): super().__init__(message=message, name="LLMAPIKeyNotSetError") @@ -17,10 +18,12 @@ class UnsupportedLLMProviderError(CogneeValidationError): """ Raised when an unsupported LLM provider is specified in the configuration. """ + def __init__(self, provider: str): message = f"Unsupported LLM provider: {provider}" super().__init__(message=message, name="UnsupportedLLMProviderError") + class MissingSystemPromptPathError(CogneeValidationError): def __init__( self, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 4126b3e13..c3c215896 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -11,7 +11,10 @@ from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) -from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError, MissingSystemPromptPathError +from cognee.infrastructure.llm.exceptions import ( + ContentPolicyFilterError, + MissingSystemPromptPathError, +) from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( rate_limit_async, diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index bdafaf238..ed867ae24 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -2,7 +2,11 @@ import time from cognee.shared.logging_utils import get_logger from typing import List, Dict, Union, Optional, Type -from cognee.modules.graph.exceptions import EntityNotFoundError, EntityAlreadyExistsError, InvalidDimensionsError +from cognee.modules.graph.exceptions import ( + EntityNotFoundError, + EntityAlreadyExistsError, + InvalidDimensionsError, +) from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph diff --git a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py index 2d8917d4a..0ca9c4fb9 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py @@ -2,6 +2,7 @@ import numpy as np from typing import List, Dict, Optional, Any, Union from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError + class Node: """ Represents a node in a graph. diff --git a/cognee/modules/graph/exceptions/exceptions.py b/cognee/modules/graph/exceptions/exceptions.py index 6fb2d400d..67f4200ff 100644 --- a/cognee/modules/graph/exceptions/exceptions.py +++ b/cognee/modules/graph/exceptions/exceptions.py @@ -25,6 +25,7 @@ class EntityAlreadyExistsError(CogneeValidationError): ): super().__init__(message, name, status_code) + class InvalidDimensionsError(CogneeValidationError): def __init__( self, @@ -34,6 +35,7 @@ class InvalidDimensionsError(CogneeValidationError): message = "Dimensions must be positive integers." super().__init__(message, name, status_code) + class DimensionOutOfRangeError(CogneeValidationError): def __init__( self, diff --git a/cognee/modules/search/exceptions/__init__.py b/cognee/modules/search/exceptions/__init__.py index ffb30f428..a019da249 100644 --- a/cognee/modules/search/exceptions/__init__.py +++ b/cognee/modules/search/exceptions/__init__.py @@ -4,6 +4,4 @@ Custom exceptions for the Cognee API. This module defines a set of exceptions for handling various data errors """ -from .exceptions import ( - UnsupportedSearchTypeError -) +from .exceptions import UnsupportedSearchTypeError diff --git a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py index a8fb17268..37ba113b5 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py @@ -4,6 +4,7 @@ import pytest from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError + def test_node_initialization(): """Test that a Node is initialized correctly.""" node = Node("node1", {"attr1": "value1"}, dimension=2) From 7af4775c562265a87c5da92c72aeee0789a84fa9 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:17:01 +0200 Subject: [PATCH 23/51] chore: changed pypdf error base class --- .../data/processing/document_types/exceptions/exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/modules/data/processing/document_types/exceptions/exceptions.py b/cognee/modules/data/processing/document_types/exceptions/exceptions.py index b5126a8a7..a05e64d60 100644 --- a/cognee/modules/data/processing/document_types/exceptions/exceptions.py +++ b/cognee/modules/data/processing/document_types/exceptions/exceptions.py @@ -1,8 +1,8 @@ -from cognee.exceptions import CogneeApiError +from cognee.exceptions import CogneeSystemError from fastapi import status -class PyPdfInternalError(CogneeApiError): +class PyPdfInternalError(CogneeSystemError): """Internal pypdf error""" def __init__( From 66d2c7512811bcb38ed9ffa31d81753d1a916d2a Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:20:24 +0200 Subject: [PATCH 24/51] chore: changing docstring --- cognee/tasks/completion/exceptions/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tasks/completion/exceptions/exceptions.py b/cognee/tasks/completion/exceptions/exceptions.py index 5052e2a77..e5ab5b67c 100644 --- a/cognee/tasks/completion/exceptions/exceptions.py +++ b/cognee/tasks/completion/exceptions/exceptions.py @@ -5,7 +5,7 @@ from fastapi import status class NoRelevantDataError(CogneeValidationError): """ Represents an error when no relevant data is found during a search. This class is a - subclass of CogneeApiError. + subclass of CogneeValidationError. Public methods: From 8bd2416bc75b0037915098d69cd24b953ce65ec9 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:36:17 +0200 Subject: [PATCH 25/51] chore: fixing delete exception import in dataset router --- cognee/api/v1/datasets/routers/get_datasets_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 627be226d..a6938d764 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.methods import create_dataset, get_datasets_by_name from cognee.shared.logging_utils import get_logger -from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError +from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.permissions.methods import ( From d14d31adbe9f32e92c625909b9d8462156c6c912 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:39:11 +0200 Subject: [PATCH 26/51] chore: updating delete_by_id test --- cognee/tests/test_delete_by_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/test_delete_by_id.py b/cognee/tests/test_delete_by_id.py index 36e0a57f5..8fc5395eb 100644 --- a/cognee/tests/test_delete_by_id.py +++ b/cognee/tests/test_delete_by_id.py @@ -7,7 +7,7 @@ from cognee.shared.logging_utils import get_logger from cognee.modules.users.methods import get_default_user, create_user from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets from cognee.modules.data.methods import get_dataset_data, get_datasets_by_name -from cognee.api.v1.delete.exceptions import DocumentNotFoundError, DatasetNotFoundError +from cognee.api.v1.exceptions import DocumentNotFoundError, DatasetNotFoundError logger = get_logger() From 748e9fad86d924edb9d78d00f22242c9d936b9d5 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:17:58 +0200 Subject: [PATCH 27/51] feat: adds s3 file system not found error to ingestion --- cognee/tasks/ingestion/exceptions/__init__.py | 10 ++++++++++ cognee/tasks/ingestion/exceptions/exceptions.py | 12 ++++++++++++ cognee/tasks/ingestion/resolve_data_directories.py | 5 +++++ 3 files changed, 27 insertions(+) create mode 100644 cognee/tasks/ingestion/exceptions/__init__.py create mode 100644 cognee/tasks/ingestion/exceptions/exceptions.py diff --git a/cognee/tasks/ingestion/exceptions/__init__.py b/cognee/tasks/ingestion/exceptions/__init__.py new file mode 100644 index 000000000..5c0a0b02f --- /dev/null +++ b/cognee/tasks/ingestion/exceptions/__init__.py @@ -0,0 +1,10 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various application errors, +such as System, Validation, Configuration or TransientErrors +""" + +from .exceptions import ( + S3FileSystemNotFoundError +) diff --git a/cognee/tasks/ingestion/exceptions/exceptions.py b/cognee/tasks/ingestion/exceptions/exceptions.py new file mode 100644 index 000000000..9d07b9ab3 --- /dev/null +++ b/cognee/tasks/ingestion/exceptions/exceptions.py @@ -0,0 +1,12 @@ +from cognee.exceptions import CogneeSystemError +from fastapi import status + + +class S3FileSystemNotFoundError(CogneeSystemError): + def __init__( + self, + name: str = "S3FileSystemNotFoundError", + status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR, + ): + message = "Could not find S3FileSystem." + super().__init__(message, name, status_code) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 0f2f2a85f..1d3124a0c 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -1,6 +1,9 @@ import os from urllib.parse import urlparse from typing import List, Union, BinaryIO + +from cognee.tasks.ingestion.exceptions import S3FileSystemNotFoundError +from cognee.exceptions import CogneeSystemError from cognee.infrastructure.files.storage.s3_config import get_s3_config @@ -54,6 +57,8 @@ async def resolve_data_directories( else: s3_files.append(key) resolved_data.extend(s3_files) + else: + raise S3FileSystemNotFoundError() elif os.path.isdir(item): # If it's a directory if include_subdirectories: From dbb967fda8ef5e09c0b0b7be102d17ab2219fc64 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 16:28:05 +0200 Subject: [PATCH 28/51] chore: updates neptune exception base + ruff --- .../graph/neptune_driver/exceptions.py | 23 +++++++++++-------- cognee/tasks/ingestion/exceptions/__init__.py | 4 +--- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py b/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py index d78936202..a200aad39 100644 --- a/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +++ b/cognee/infrastructure/databases/graph/neptune_driver/exceptions.py @@ -3,7 +3,12 @@ This module defines custom exceptions for Neptune Analytics operations. """ -from cognee.exceptions import CogneeSystemError +from cognee.exceptions import ( + CogneeSystemError, + CogneeTransientError, + CogneeValidationError, + CogneeConfigurationError, +) from fastapi import status @@ -19,7 +24,7 @@ class NeptuneAnalyticsError(CogneeSystemError): super().__init__(message, name, status_code) -class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError): +class NeptuneAnalyticsConnectionError(CogneeTransientError): """Exception raised when connection to Neptune Analytics fails.""" def __init__( @@ -31,7 +36,7 @@ class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsQueryError(NeptuneAnalyticsError): +class NeptuneAnalyticsQueryError(CogneeValidationError): """Exception raised when a query execution fails.""" def __init__( @@ -43,7 +48,7 @@ class NeptuneAnalyticsQueryError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError): +class NeptuneAnalyticsAuthenticationError(CogneeConfigurationError): """Exception raised when authentication with Neptune Analytics fails.""" def __init__( @@ -55,7 +60,7 @@ class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError): +class NeptuneAnalyticsConfigurationError(CogneeConfigurationError): """Exception raised when Neptune Analytics configuration is invalid.""" def __init__( @@ -67,7 +72,7 @@ class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError): +class NeptuneAnalyticsTimeoutError(CogneeTransientError): """Exception raised when a Neptune Analytics operation times out.""" def __init__( @@ -79,7 +84,7 @@ class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError): +class NeptuneAnalyticsThrottlingError(CogneeTransientError): """Exception raised when requests are throttled by Neptune Analytics.""" def __init__( @@ -91,7 +96,7 @@ class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError): +class NeptuneAnalyticsResourceNotFoundError(CogneeValidationError): """Exception raised when a Neptune Analytics resource is not found.""" def __init__( @@ -103,7 +108,7 @@ class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError): super().__init__(message, name, status_code) -class NeptuneAnalyticsInvalidParameterError(NeptuneAnalyticsError): +class NeptuneAnalyticsInvalidParameterError(CogneeValidationError): """Exception raised when invalid parameters are provided to Neptune Analytics.""" def __init__( diff --git a/cognee/tasks/ingestion/exceptions/__init__.py b/cognee/tasks/ingestion/exceptions/__init__.py index 5c0a0b02f..f24792d1f 100644 --- a/cognee/tasks/ingestion/exceptions/__init__.py +++ b/cognee/tasks/ingestion/exceptions/__init__.py @@ -5,6 +5,4 @@ This module defines a set of exceptions for handling various application errors, such as System, Validation, Configuration or TransientErrors """ -from .exceptions import ( - S3FileSystemNotFoundError -) +from .exceptions import S3FileSystemNotFoundError From c99b453d965df490b02e370710bb94070f97993d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 10:57:16 +0200 Subject: [PATCH 29/51] feat: adds WrongDataDocumentError to classify documents --- cognee/tasks/documents/classify_documents.py | 5 +++++ cognee/tasks/documents/exceptions/__init__.py | 9 +++++++++ cognee/tasks/documents/exceptions/exceptions.py | 17 +++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 cognee/tasks/documents/exceptions/__init__.py create mode 100644 cognee/tasks/documents/exceptions/exceptions.py diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 673e17c75..6567e5825 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -10,6 +10,7 @@ from cognee.modules.data.processing.document_types import ( ) from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.engine.utils.generate_node_id import generate_node_id +from cognee.tasks.documents.exceptions import WrongDataDocumentInputError EXTENSION_TO_DOCUMENT_CLASS = { "pdf": PdfDocument, # Text documents @@ -111,8 +112,12 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]: - list[Document]: A list of Document objects created based on the classified data documents. """ + if not isinstance(data_documents, list): + raise WrongDataDocumentInputError("data_documents") + documents = [] for data_item in data_documents: + document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension]( id=data_item.id, title=f"{data_item.name}.{data_item.extension}", diff --git a/cognee/tasks/documents/exceptions/__init__.py b/cognee/tasks/documents/exceptions/__init__.py new file mode 100644 index 000000000..cdd50c6b0 --- /dev/null +++ b/cognee/tasks/documents/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + WrongDataDocumentInputError, +) diff --git a/cognee/tasks/documents/exceptions/exceptions.py b/cognee/tasks/documents/exceptions/exceptions.py new file mode 100644 index 000000000..a1fcb1d4d --- /dev/null +++ b/cognee/tasks/documents/exceptions/exceptions.py @@ -0,0 +1,17 @@ +from cognee.exceptions import ( + CogneeValidationError, + CogneeConfigurationError, +) +from fastapi import status + + +class WrongDataDocumentInputError(CogneeValidationError): + """Raised when a wrong data document is provided.""" + def __init__( + self, + field: str, + name: str = "WrongDataDocumentInputError", + status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY, + ): + message = f"Missing of invalid parameter: '{field}'." + super().__init__(message, name, status_code) \ No newline at end of file From df3a3df117026aea7bab81521e9c06b96f152bb7 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:12:08 +0200 Subject: [PATCH 30/51] feat: adds errors to classify, and chunking top level --- cognee/tasks/documents/classify_documents.py | 1 - cognee/tasks/documents/exceptions/__init__.py | 2 ++ .../tasks/documents/exceptions/exceptions.py | 21 ++++++++++++++++++- .../extract_chunks_from_documents.py | 10 +++++++-- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 6567e5825..9fa512906 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -117,7 +117,6 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]: documents = [] for data_item in data_documents: - document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension]( id=data_item.id, title=f"{data_item.name}.{data_item.extension}", diff --git a/cognee/tasks/documents/exceptions/__init__.py b/cognee/tasks/documents/exceptions/__init__.py index cdd50c6b0..a8602d6f5 100644 --- a/cognee/tasks/documents/exceptions/__init__.py +++ b/cognee/tasks/documents/exceptions/__init__.py @@ -6,4 +6,6 @@ This module defines a set of exceptions for handling various data errors from .exceptions import ( WrongDataDocumentInputError, + InvalidChunkSizeError, + InvalidChunkerError, ) diff --git a/cognee/tasks/documents/exceptions/exceptions.py b/cognee/tasks/documents/exceptions/exceptions.py index a1fcb1d4d..27907aaf1 100644 --- a/cognee/tasks/documents/exceptions/exceptions.py +++ b/cognee/tasks/documents/exceptions/exceptions.py @@ -7,6 +7,7 @@ from fastapi import status class WrongDataDocumentInputError(CogneeValidationError): """Raised when a wrong data document is provided.""" + def __init__( self, field: str, @@ -14,4 +15,22 @@ class WrongDataDocumentInputError(CogneeValidationError): status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY, ): message = f"Missing of invalid parameter: '{field}'." - super().__init__(message, name, status_code) \ No newline at end of file + super().__init__(message, name, status_code) + + +class InvalidChunkSizeError(CogneeValidationError): + def __init__(self, value): + super().__init__( + message=f"max_chunk_size must be a positive integer (got {value}).", + name="InvalidChunkSizeError", + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +class InvalidChunkerError(CogneeValidationError): + def __init__(self): + super().__init__( + message=f"chunker must be a valid Chunker class.", + name="InvalidChunkerError", + status_code=status.HTTP_400_BAD_REQUEST, + ) diff --git a/cognee/tasks/documents/extract_chunks_from_documents.py b/cognee/tasks/documents/extract_chunks_from_documents.py index 216185495..d52380c8d 100644 --- a/cognee/tasks/documents/extract_chunks_from_documents.py +++ b/cognee/tasks/documents/extract_chunks_from_documents.py @@ -8,6 +8,7 @@ from cognee.modules.data.models import Data from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.chunking.TextChunker import TextChunker from cognee.modules.chunking.Chunker import Chunker +from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError async def update_document_token_count(document_id: UUID, token_count: int) -> None: @@ -37,6 +38,13 @@ async def extract_chunks_from_documents( - The `read` method of the `Document` class must be implemented to support the chunking operation. - The `chunker` parameter determines the chunking logic and should align with the document type. """ + if not isinstance(max_chunk_size, int) or max_chunk_size <= 0: + raise InvalidChunkSizeError(max_chunk_size) + if not isinstance(chunker, type): + raise InvalidChunkerError() + if not hasattr(chunker, "read"): + raise InvalidChunkerError() + for document in documents: document_token_count = 0 @@ -48,5 +56,3 @@ async def extract_chunks_from_documents( yield document_chunk await update_document_token_count(document.id, document_token_count) - - # todo rita From 9f965c44b4cdebda8129a7fdd487d96d27a1a5ea Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:53:39 +0200 Subject: [PATCH 31/51] feat: adds input checks for extract graph from data --- cognee/tasks/graph/exceptions/__init__.py | 12 ++++++ cognee/tasks/graph/exceptions/exceptions.py | 37 +++++++++++++++++++ cognee/tasks/graph/extract_graph_from_data.py | 25 ++++++++++++- 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 cognee/tasks/graph/exceptions/__init__.py create mode 100644 cognee/tasks/graph/exceptions/exceptions.py diff --git a/cognee/tasks/graph/exceptions/__init__.py b/cognee/tasks/graph/exceptions/__init__.py new file mode 100644 index 000000000..3516ba151 --- /dev/null +++ b/cognee/tasks/graph/exceptions/__init__.py @@ -0,0 +1,12 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + InvalidDataChunksError, + InvalidGraphModelError, + InvalidOntologyAdapterError, + InvalidChunkGraphInputError +) diff --git a/cognee/tasks/graph/exceptions/exceptions.py b/cognee/tasks/graph/exceptions/exceptions.py new file mode 100644 index 000000000..0708bab31 --- /dev/null +++ b/cognee/tasks/graph/exceptions/exceptions.py @@ -0,0 +1,37 @@ +from cognee.exceptions import ( + CogneeValidationError, + CogneeConfigurationError, +) +from fastapi import status + + +class InvalidDataChunksError(CogneeValidationError): + def __init__(self, detail: str): + super().__init__( + message=f"Invalid data_chunks: {detail}", + name="InvalidDataChunksError", + status_code=status.HTTP_400_BAD_REQUEST, + ) + +class InvalidGraphModelError(CogneeValidationError): + def __init__(self, got): + super().__init__( + message=f"graph_model must be a subclass of BaseModel (got {got}).", + name="InvalidGraphModelError", + status_code=status.HTTP_400_BAD_REQUEST, + ) + +class InvalidOntologyAdapterError(CogneeConfigurationError): + def __init__(self, got): + super().__init__( + message=f"ontology_adapter lacks required interface (got {got}).", + name="InvalidOntologyAdapterError", + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) + + +class InvalidChunkGraphInputError(CogneeValidationError): + def __init__(self, detail: str): + super().__init__( + message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}", + name="InvalidChunkGraphInputError", + status_code=status.HTTP_400_BAD_REQUEST) \ No newline at end of file diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index aa415d504..2ddd85075 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -12,7 +12,12 @@ from cognee.modules.graph.utils import ( ) from cognee.shared.data_models import KnowledgeGraph from cognee.infrastructure.llm.LLMGateway import LLMGateway - +from cognee.tasks.graph.exceptions import ( + InvalidGraphModelError, + InvalidDataChunksError, + InvalidChunkGraphInputError, + InvalidOntologyAdapterError, +) async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], @@ -21,6 +26,16 @@ async def integrate_chunk_graphs( ontology_adapter: OntologyResolver, ) -> List[DocumentChunk]: """Updates DocumentChunk objects, integrates data points and edges into databases.""" + + if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list): + raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.") + if len(data_chunks) != len(chunk_graphs): + raise InvalidChunkGraphInputError(f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs.") + if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): + raise InvalidGraphModelError(graph_model) + if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"): + raise InvalidOntologyAdapterError(type(ontology_adapter).__name__ if ontology_adapter else "None") + graph_engine = await get_graph_engine() if graph_model is not KnowledgeGraph: @@ -55,6 +70,14 @@ async def extract_graph_from_data( """ Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model. """ + + if not isinstance(data_chunks, list) or not data_chunks: + raise InvalidDataChunksError("must be a non-empty list of DocumentChunk.") + if not all(hasattr(c, "text") for c in data_chunks): + raise InvalidDataChunksError("each chunk must have a 'text' attribute") + if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): + raise InvalidGraphModelError(graph_model) + chunk_graphs = await asyncio.gather( *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) From 63d071f0d86d54246e85eb112f24aea7263db86c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:17:13 +0200 Subject: [PATCH 32/51] feat: adds input checks for add datapoints and summarization tasks --- cognee/tasks/storage/add_data_points.py | 9 +++++++++ cognee/tasks/storage/exceptions/__init__.py | 9 +++++++++ cognee/tasks/storage/exceptions/exceptions.py | 13 +++++++++++++ cognee/tasks/summarization/exceptions/__init__.py | 9 +++++++++ cognee/tasks/summarization/exceptions/exceptions.py | 13 +++++++++++++ cognee/tasks/summarization/summarize_text.py | 10 +++++++++- 6 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 cognee/tasks/storage/exceptions/__init__.py create mode 100644 cognee/tasks/storage/exceptions/exceptions.py create mode 100644 cognee/tasks/summarization/exceptions/__init__.py create mode 100644 cognee/tasks/summarization/exceptions/exceptions.py diff --git a/cognee/tasks/storage/add_data_points.py b/cognee/tasks/storage/add_data_points.py index 9b5c36c37..41bda954f 100644 --- a/cognee/tasks/storage/add_data_points.py +++ b/cognee/tasks/storage/add_data_points.py @@ -5,9 +5,18 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.graph.utils import deduplicate_nodes_and_edges, get_graph_from_model from .index_data_points import index_data_points from .index_graph_edges import index_graph_edges +from cognee.tasks.storage.exceptions import ( + InvalidDataPointsInAddDataPointsError, +) async def add_data_points(data_points: List[DataPoint]) -> List[DataPoint]: + + if not isinstance(data_points, list): + raise InvalidDataPointsInAddDataPointsError("data_points must be a list.") + if not all(isinstance(dp, DataPoint) for dp in data_points): + raise InvalidDataPointsInAddDataPointsError("data_points: each item must be a DataPoint.") + nodes = [] edges = [] diff --git a/cognee/tasks/storage/exceptions/__init__.py b/cognee/tasks/storage/exceptions/__init__.py new file mode 100644 index 000000000..33dd215d6 --- /dev/null +++ b/cognee/tasks/storage/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + InvalidDataPointsInAddDataPointsError, +) diff --git a/cognee/tasks/storage/exceptions/exceptions.py b/cognee/tasks/storage/exceptions/exceptions.py new file mode 100644 index 000000000..2da6a84aa --- /dev/null +++ b/cognee/tasks/storage/exceptions/exceptions.py @@ -0,0 +1,13 @@ +from cognee.exceptions import ( + CogneeValidationError, +) +from fastapi import status + + +class InvalidDataPointsInAddDataPointsError(CogneeValidationError): + def __init__(self, detail: str): + super().__init__( + message=f"Invalid data_points: {detail}", + name="InvalidDataPointsInAddDataPointsError", + status_code=status.HTTP_400_BAD_REQUEST) + diff --git a/cognee/tasks/summarization/exceptions/__init__.py b/cognee/tasks/summarization/exceptions/__init__.py new file mode 100644 index 000000000..502f973e7 --- /dev/null +++ b/cognee/tasks/summarization/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various data errors +""" + +from .exceptions import ( + InvalidSummaryInputsError, +) diff --git a/cognee/tasks/summarization/exceptions/exceptions.py b/cognee/tasks/summarization/exceptions/exceptions.py new file mode 100644 index 000000000..d0c9cfec6 --- /dev/null +++ b/cognee/tasks/summarization/exceptions/exceptions.py @@ -0,0 +1,13 @@ +from cognee.exceptions import ( + CogneeValidationError, + CogneeConfigurationError, +) +from fastapi import status + + +class InvalidSummaryInputsError(CogneeValidationError): + def __init__(self, detail: str): + super().__init__( + message=f"Invalid summarize_text inputs: {detail}", + name="InvalidSummaryInputsError", + status_code=status.HTTP_400_BAD_REQUEST) \ No newline at end of file diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py index ca6964f83..b482b5b4a 100644 --- a/cognee/tasks/summarization/summarize_text.py +++ b/cognee/tasks/summarization/summarize_text.py @@ -3,10 +3,11 @@ from typing import Type from uuid import uuid5 from pydantic import BaseModel +from cognee.tasks.summarization.exceptions import InvalidSummaryInputsError from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.modules.cognify.config import get_cognify_config -from .models import TextSummary +from cognee.tasks.summarization.models import TextSummary async def summarize_text( @@ -35,6 +36,13 @@ async def summarize_text( A list of TextSummary objects, each containing the summary of a corresponding DocumentChunk. """ + + if not isinstance(data_chunks, list): + raise InvalidSummaryInputsError("data_chunks must be a list.") + if not all(hasattr(c, "text") for c in data_chunks): + raise InvalidSummaryInputsError("each DocumentChunk must have a 'text' attribute.") + + if len(data_chunks) == 0: return data_chunks From affbc557d266c45dc5794c09e3c4e5bbab66097d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:17:35 +0200 Subject: [PATCH 33/51] chore: ruff formatting --- cognee/tasks/graph/exceptions/__init__.py | 2 +- cognee/tasks/graph/exceptions/exceptions.py | 8 ++++++-- cognee/tasks/graph/extract_graph_from_data.py | 9 +++++++-- cognee/tasks/storage/add_data_points.py | 1 - cognee/tasks/storage/exceptions/exceptions.py | 4 ++-- cognee/tasks/summarization/exceptions/exceptions.py | 3 ++- cognee/tasks/summarization/summarize_text.py | 1 - 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cognee/tasks/graph/exceptions/__init__.py b/cognee/tasks/graph/exceptions/__init__.py index 3516ba151..d91bbbd36 100644 --- a/cognee/tasks/graph/exceptions/__init__.py +++ b/cognee/tasks/graph/exceptions/__init__.py @@ -8,5 +8,5 @@ from .exceptions import ( InvalidDataChunksError, InvalidGraphModelError, InvalidOntologyAdapterError, - InvalidChunkGraphInputError + InvalidChunkGraphInputError, ) diff --git a/cognee/tasks/graph/exceptions/exceptions.py b/cognee/tasks/graph/exceptions/exceptions.py index 0708bab31..c09ee1c08 100644 --- a/cognee/tasks/graph/exceptions/exceptions.py +++ b/cognee/tasks/graph/exceptions/exceptions.py @@ -13,6 +13,7 @@ class InvalidDataChunksError(CogneeValidationError): status_code=status.HTTP_400_BAD_REQUEST, ) + class InvalidGraphModelError(CogneeValidationError): def __init__(self, got): super().__init__( @@ -21,12 +22,14 @@ class InvalidGraphModelError(CogneeValidationError): status_code=status.HTTP_400_BAD_REQUEST, ) + class InvalidOntologyAdapterError(CogneeConfigurationError): def __init__(self, got): super().__init__( message=f"ontology_adapter lacks required interface (got {got}).", name="InvalidOntologyAdapterError", - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) class InvalidChunkGraphInputError(CogneeValidationError): @@ -34,4 +37,5 @@ class InvalidChunkGraphInputError(CogneeValidationError): super().__init__( message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}", name="InvalidChunkGraphInputError", - status_code=status.HTTP_400_BAD_REQUEST) \ No newline at end of file + status_code=status.HTTP_400_BAD_REQUEST, + ) diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 2ddd85075..019e9e4a1 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -19,6 +19,7 @@ from cognee.tasks.graph.exceptions import ( InvalidOntologyAdapterError, ) + async def integrate_chunk_graphs( data_chunks: list[DocumentChunk], chunk_graphs: list, @@ -30,11 +31,15 @@ async def integrate_chunk_graphs( if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list): raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.") if len(data_chunks) != len(chunk_graphs): - raise InvalidChunkGraphInputError(f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs.") + raise InvalidChunkGraphInputError( + f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs." + ) if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel): raise InvalidGraphModelError(graph_model) if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"): - raise InvalidOntologyAdapterError(type(ontology_adapter).__name__ if ontology_adapter else "None") + raise InvalidOntologyAdapterError( + type(ontology_adapter).__name__ if ontology_adapter else "None" + ) graph_engine = await get_graph_engine() diff --git a/cognee/tasks/storage/add_data_points.py b/cognee/tasks/storage/add_data_points.py index 41bda954f..6cdc90ac9 100644 --- a/cognee/tasks/storage/add_data_points.py +++ b/cognee/tasks/storage/add_data_points.py @@ -11,7 +11,6 @@ from cognee.tasks.storage.exceptions import ( async def add_data_points(data_points: List[DataPoint]) -> List[DataPoint]: - if not isinstance(data_points, list): raise InvalidDataPointsInAddDataPointsError("data_points must be a list.") if not all(isinstance(dp, DataPoint) for dp in data_points): diff --git a/cognee/tasks/storage/exceptions/exceptions.py b/cognee/tasks/storage/exceptions/exceptions.py index 2da6a84aa..9b2de9efd 100644 --- a/cognee/tasks/storage/exceptions/exceptions.py +++ b/cognee/tasks/storage/exceptions/exceptions.py @@ -9,5 +9,5 @@ class InvalidDataPointsInAddDataPointsError(CogneeValidationError): super().__init__( message=f"Invalid data_points: {detail}", name="InvalidDataPointsInAddDataPointsError", - status_code=status.HTTP_400_BAD_REQUEST) - + status_code=status.HTTP_400_BAD_REQUEST, + ) diff --git a/cognee/tasks/summarization/exceptions/exceptions.py b/cognee/tasks/summarization/exceptions/exceptions.py index d0c9cfec6..9e8e7197e 100644 --- a/cognee/tasks/summarization/exceptions/exceptions.py +++ b/cognee/tasks/summarization/exceptions/exceptions.py @@ -10,4 +10,5 @@ class InvalidSummaryInputsError(CogneeValidationError): super().__init__( message=f"Invalid summarize_text inputs: {detail}", name="InvalidSummaryInputsError", - status_code=status.HTTP_400_BAD_REQUEST) \ No newline at end of file + status_code=status.HTTP_400_BAD_REQUEST, + ) diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py index b482b5b4a..f6dcc54a2 100644 --- a/cognee/tasks/summarization/summarize_text.py +++ b/cognee/tasks/summarization/summarize_text.py @@ -42,7 +42,6 @@ async def summarize_text( if not all(hasattr(c, "text") for c in data_chunks): raise InvalidSummaryInputsError("each DocumentChunk must have a 'text' attribute.") - if len(data_chunks) == 0: return data_chunks From a7d7e12d4cb808cb52b8f688718a25e351a65109 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:48:35 +0200 Subject: [PATCH 34/51] ruff fix --- cognee/tasks/documents/exceptions/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tasks/documents/exceptions/exceptions.py b/cognee/tasks/documents/exceptions/exceptions.py index 27907aaf1..737e9a3d1 100644 --- a/cognee/tasks/documents/exceptions/exceptions.py +++ b/cognee/tasks/documents/exceptions/exceptions.py @@ -30,7 +30,7 @@ class InvalidChunkSizeError(CogneeValidationError): class InvalidChunkerError(CogneeValidationError): def __init__(self): super().__init__( - message=f"chunker must be a valid Chunker class.", + message="chunker must be a valid Chunker class.", name="InvalidChunkerError", status_code=status.HTTP_400_BAD_REQUEST, ) From c60627306f20b54a844b91b6753364dfa3e36998 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 14 Aug 2025 15:04:53 +0100 Subject: [PATCH 35/51] Refactor CI workflows to replace Poetry with uv for dependency management and execution --- .github/actions/cognee_setup/action.yml | 19 ++++--------- .github/workflows/basic_tests.yml | 10 +++---- .github/workflows/db_examples_tests.yml | 12 ++++---- .github/workflows/e2e_tests.yml | 28 +++++++++---------- .github/workflows/examples_tests.yml | 10 +++---- .github/workflows/graph_db_tests.yml | 12 ++++---- .github/workflows/python_version_tests.yml | 10 +++---- .../relational_db_migration_tests.yml | 12 ++++---- .github/workflows/reusable_notebook.yml | 4 +-- .github/workflows/search_db_tests.yml | 16 +++++------ .github/workflows/test_gemini.yml | 2 +- .github/workflows/test_ollama.yml | 4 +-- .github/workflows/test_s3_file_storage.yml | 2 +- .github/workflows/vector_db_tests.yml | 6 ++-- .github/workflows/weighted_edges_tests.yml | 12 ++++---- 15 files changed, 76 insertions(+), 83 deletions(-) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index 8a3bbb7ef..e79619221 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -1,5 +1,5 @@ name: cognee-setup -description: "Sets up Python, installs Poetry, loads venv from cache, and installs dependencies for Cognee." +description: "Sets up Python, installs uv, and installs dependencies for Cognee." inputs: python-version: @@ -16,18 +16,11 @@ runs: with: python-version: ${{ inputs.python-version }} - - name: Install Poetry - shell: bash - run: | - python -m pip install --upgrade pip - pip install poetry - - - name: Rebuild Poetry lock file - shell: bash - run: | - rm poetry.lock - poetry lock + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true - name: Install dependencies shell: bash - run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev -E neo4j + run: uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index 28c708ddb..2173b228e 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -98,7 +98,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Run Unit Tests - run: poetry run pytest cognee/tests/unit/ + run: uv run pytest cognee/tests/unit/ integration-tests: name: Run Integration Tests @@ -115,7 +115,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Run Integration Tests - run: poetry run pytest cognee/tests/integration/ + run: uv run pytest cognee/tests/integration/ simple-examples: name: Run Simple Examples @@ -144,7 +144,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Run Simple Examples - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py simple-examples-baml: name: Run Simple Examples BAML @@ -180,7 +180,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Run Simple Examples - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py graph-tests: name: Run Basic Graph Tests @@ -209,4 +209,4 @@ jobs: python-version: ${{ inputs.python-version }} - name: Run Graph Tests - run: poetry run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph + run: uv run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml index f585e8200..552395e6a 100644 --- a/.github/workflows/db_examples_tests.yml +++ b/.github/workflows/db_examples_tests.yml @@ -56,7 +56,7 @@ jobs: - name: Install Neo4j extra run: | - poetry install -E neo4j + uv sync --extra neo4j - name: Run Neo4j Example env: @@ -74,7 +74,7 @@ jobs: GRAPH_DATABASE_USERNAME: "neo4j" GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} run: | - poetry run python examples/database_examples/neo4j_example.py + uv run python examples/database_examples/neo4j_example.py run-db-example-kuzu: name: "Kuzu DB Example Test" @@ -92,7 +92,7 @@ jobs: - name: Install Kuzu extra run: | - poetry install + uv sync - name: Run Kuzu Example env: @@ -107,7 +107,7 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} GRAPH_DATABASE_PROVIDER: "kuzu" run: | - poetry run python examples/database_examples/kuzu_example.py + uv run python examples/database_examples/kuzu_example.py run-db-example-pgvector: name: "PostgreSQL PGVector DB Example Test" @@ -141,7 +141,7 @@ jobs: - name: Install PGVector extra run: | - poetry install -E postgres + uv sync --extra postgres - name: Run PGVector Example env: @@ -155,4 +155,4 @@ jobs: EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: | - poetry run python examples/database_examples/pgvector_example.py + uv run python examples/database_examples/pgvector_example.py diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 99eb2a72c..4f7c67314 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -57,7 +57,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_cognee_server_start.py + run: uv run python ./cognee/tests/test_cognee_server_start.py run-telemetry-test: name: Run Telemetry Test @@ -81,7 +81,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_telemetry.py + run: uv run python ./cognee/tests/test_telemetry.py run-telemetry-pipeline-test: name: Run Telemetry Pipeline Test @@ -110,7 +110,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_library.py + run: uv run python ./cognee/tests/test_library.py run-deduplication-test: name: Deduplication Test @@ -143,7 +143,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E postgres + uv sync --extra postgres - name: Run Deduplication Example env: @@ -154,7 +154,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_deduplication.py + run: uv run python ./cognee/tests/test_deduplication.py run-deletion-test: name: Deletion Test @@ -179,7 +179,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_deletion.py + run: uv run python ./cognee/tests/test_deletion.py run-s3-bucket-test: name: S3 Bucket Test @@ -195,7 +195,7 @@ jobs: - name: Install specific S3 dependency run: | - poetry install -E aws + uv sync --extra aws - name: Run S3 Bucket Test env: @@ -210,7 +210,7 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - run: poetry run python ./cognee/tests/test_s3.py + run: uv run python ./cognee/tests/test_s3.py test-parallel-databases: name: Test using different async databases in parallel in Cognee @@ -226,7 +226,7 @@ jobs: - name: Install specific graph db dependency run: | - poetry install + uv sync - name: Run parallel databases test env: @@ -239,7 +239,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_parallel_databases.py + run: uv run python ./cognee/tests/test_parallel_databases.py test-permissions: name: Test permissions with different situations in Cognee @@ -255,7 +255,7 @@ jobs: - name: Install specific graph db dependency run: | - poetry install + uv sync - name: Run parallel databases test env: @@ -268,7 +268,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_permissions.py + run: uv run python ./cognee/tests/test_permissions.py test-graph-edges: name: Test graph edge ingestion @@ -284,7 +284,7 @@ jobs: - name: Install specific graph db dependency run: | - poetry install + uv sync - name: Run graph edges test env: @@ -297,4 +297,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_edge_ingestion.py + run: uv run python ./cognee/tests/test_edge_ingestion.py diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index 796faf2e0..f0ae430ec 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -20,7 +20,7 @@ jobs: env: LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: poetry run python ./examples/python/multimedia_example.py + run: uv run python ./examples/python/multimedia_example.py test-eval-example: name: Run Eval Example @@ -36,7 +36,7 @@ jobs: - name: Install specific eval dependency run: | - poetry install -E deepeval + uv sync --extra deepeval - name: Run Evaluation Framework Example env: @@ -49,7 +49,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/eval_framework/run_eval.py + run: uv run python ./cognee/eval_framework/run_eval.py test-descriptive-metrics: name: Run Descriptive Metrics Example @@ -74,7 +74,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py + run: uv run python ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py test-dynamic-steps-metrics: @@ -100,4 +100,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./examples/python/dynamic_steps_example.py + run: uv run python ./examples/python/dynamic_steps_example.py diff --git a/.github/workflows/graph_db_tests.yml b/.github/workflows/graph_db_tests.yml index 379539e14..91a611168 100644 --- a/.github/workflows/graph_db_tests.yml +++ b/.github/workflows/graph_db_tests.yml @@ -30,7 +30,7 @@ jobs: - name: Install specific db dependency run: | - poetry install + uv sync - name: Run Kuzu Tests env: @@ -43,7 +43,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_kuzu.py + run: uv run python ./cognee/tests/test_kuzu.py - name: Run Weighted Edges Tests with Kuzu env: @@ -57,7 +57,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v + run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v run-neo4j-tests: name: Neo4j Tests @@ -74,7 +74,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E neo4j + uv sync --extra neo4j - name: Run default Neo4j env: @@ -91,7 +91,7 @@ jobs: GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }} GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} GRAPH_DATABASE_USERNAME: "neo4j" - run: poetry run python ./cognee/tests/test_neo4j.py + run: uv run python ./cognee/tests/test_neo4j.py - name: Run Weighted Edges Tests with Neo4j env: @@ -108,4 +108,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v + run: uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v diff --git a/.github/workflows/python_version_tests.yml b/.github/workflows/python_version_tests.yml index 6db67d115..d7a7da050 100644 --- a/.github/workflows/python_version_tests.yml +++ b/.github/workflows/python_version_tests.yml @@ -55,7 +55,7 @@ jobs: - name: Run unit tests shell: bash - run: poetry run pytest cognee/tests/unit/ + run: uv run pytest cognee/tests/unit/ env: PYTHONUTF8: 1 LLM_PROVIDER: openai @@ -73,7 +73,7 @@ jobs: - name: Run integration tests if: ${{ !contains(matrix.os, 'windows') }} shell: bash - run: poetry run pytest cognee/tests/integration/ + run: uv run pytest cognee/tests/integration/ env: PYTHONUTF8: 1 LLM_PROVIDER: openai @@ -103,11 +103,11 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_library.py + run: uv run python ./cognee/tests/test_library.py - - name: Build with Poetry + - name: Build with uv shell: bash - run: poetry build + run: uv build - name: Install Package if: ${{ !contains(matrix.os, 'windows') }} diff --git a/.github/workflows/relational_db_migration_tests.yml b/.github/workflows/relational_db_migration_tests.yml index e44d54d02..604964512 100644 --- a/.github/workflows/relational_db_migration_tests.yml +++ b/.github/workflows/relational_db_migration_tests.yml @@ -61,7 +61,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E postgres + uv sync --extra postgres - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: @@ -90,7 +90,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_relational_db_migration.py + run: uv run python ./cognee/tests/test_relational_db_migration.py run-relational-db-migration-test-kuzu: name: Kuzu Relational DB Migration Test @@ -123,7 +123,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E postgres + uv sync --extra postgres - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: @@ -154,7 +154,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_relational_db_migration.py + run: uv run python ./cognee/tests/test_relational_db_migration.py run-relational-db-migration-test-neo4j: name: Neo4j Relational DB Migration Test @@ -187,7 +187,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E postgres -E neo4j + uv sync --extra postgres --extra neo4j - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: @@ -221,4 +221,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_relational_db_migration.py + run: uv run python ./cognee/tests/test_relational_db_migration.py diff --git a/.github/workflows/reusable_notebook.yml b/.github/workflows/reusable_notebook.yml index 4f97476ef..7ee7f61f3 100644 --- a/.github/workflows/reusable_notebook.yml +++ b/.github/workflows/reusable_notebook.yml @@ -49,7 +49,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E notebook + uv sync --extra notebook - name: Execute Jupyter Notebook env: @@ -64,7 +64,7 @@ jobs: EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: | - poetry run jupyter nbconvert \ + uv run jupyter nbconvert \ --to notebook \ --execute ${{ inputs.notebook-location }} \ --output executed_notebook.ipynb \ diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml index 3f4bbf1ca..a2ab00754 100644 --- a/.github/workflows/search_db_tests.yml +++ b/.github/workflows/search_db_tests.yml @@ -30,7 +30,7 @@ jobs: - name: Install specific db dependency run: | - poetry install + uv sync - name: Run Kuzu search Tests env: @@ -46,7 +46,7 @@ jobs: GRAPH_DATABASE_PROVIDER: 'kuzu' VECTOR_DB_PROVIDER: 'lancedb' DB_PROVIDER: 'sqlite' - run: poetry run python ./cognee/tests/test_search_db.py + run: uv run python ./cognee/tests/test_search_db.py run-neo4j-lance-sqlite-search-tests: name: Search test for Neo4j/LanceDB/Sqlite @@ -80,7 +80,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E neo4j + uv sync --extra neo4j - name: Run Neo4j search Tests env: @@ -99,7 +99,7 @@ jobs: GRAPH_DATABASE_URL: bolt://localhost:7687 GRAPH_DATABASE_USERNAME: neo4j GRAPH_DATABASE_PASSWORD: pleaseletmein - run: poetry run python ./cognee/tests/test_search_db.py + run: uv run python ./cognee/tests/test_search_db.py run-kuzu-pgvector-postgres-search-tests: name: Search test for Kuzu/PGVector/Postgres @@ -131,7 +131,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Install dependencies - run: poetry install -E postgres + run: uv sync --extra postgres - name: Run Kuzu/PGVector/Postgres Tests env: @@ -152,7 +152,7 @@ jobs: DB_PORT: 5432 DB_USERNAME: cognee DB_PASSWORD: cognee - run: poetry run python ./cognee/tests/test_search_db.py + run: uv run python ./cognee/tests/test_search_db.py run-neo4j-pgvector-postgres-search-tests: name: Search test for Neo4j/PGVector/Postgres @@ -198,7 +198,7 @@ jobs: - name: Install dependencies run: | - poetry install -E neo4j -E postgres + uv sync --extra neo4j --extra postgres - name: Run Neo4j + PGVector + Postgres search Tests env: @@ -222,4 +222,4 @@ jobs: DB_PORT: 5432 DB_USERNAME: cognee DB_PASSWORD: cognee - run: poetry run python ./cognee/tests/test_search_db.py + run: uv run python ./cognee/tests/test_search_db.py diff --git a/.github/workflows/test_gemini.yml b/.github/workflows/test_gemini.yml index 41fd954a2..544e15a5e 100644 --- a/.github/workflows/test_gemini.yml +++ b/.github/workflows/test_gemini.yml @@ -26,4 +26,4 @@ jobs: EMBEDDING_MODEL: "gemini/text-embedding-004" EMBEDDING_DIMENSIONS: "768" EMBEDDING_MAX_TOKENS: "8076" - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml index 8f85efadc..fbd687319 100644 --- a/.github/workflows/test_ollama.yml +++ b/.github/workflows/test_ollama.yml @@ -26,7 +26,7 @@ jobs: - name: Install torch dependency run: | - poetry add torch + uv add torch # - name: Install ollama # run: curl -fsSL https://ollama.com/install.sh | sh @@ -101,4 +101,4 @@ jobs: EMBEDDING_ENDPOINT: "http://localhost:11434/api/embeddings" EMBEDDING_DIMENSIONS: "4096" HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral" - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_s3_file_storage.yml b/.github/workflows/test_s3_file_storage.yml index 8035a9bc3..c4866ec2d 100644 --- a/.github/workflows/test_s3_file_storage.yml +++ b/.github/workflows/test_s3_file_storage.yml @@ -36,4 +36,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_s3_file_storage.py + run: uv run python ./cognee/tests/test_s3_file_storage.py diff --git a/.github/workflows/vector_db_tests.yml b/.github/workflows/vector_db_tests.yml index 00ce21cbf..faa570868 100644 --- a/.github/workflows/vector_db_tests.yml +++ b/.github/workflows/vector_db_tests.yml @@ -56,7 +56,7 @@ jobs: # EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} # EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} # EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - # run: poetry run python ./cognee/tests/test_chromadb.py + # run: uv run python ./cognee/tests/test_chromadb.py run-postgres-tests: @@ -90,7 +90,7 @@ jobs: - name: Install specific db dependency run: | - poetry install -E postgres + uv sync --extra postgres - name: Run PGVector Tests env: @@ -103,4 +103,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: poetry run python ./cognee/tests/test_pgvector.py + run: uv run python ./cognee/tests/test_pgvector.py diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index d33bd1ceb..6d33082a9 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -47,11 +47,11 @@ jobs: - name: Run Weighted Edges Unit Tests run: | - poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short + uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short - name: Run Standard Graph Tests (Regression) run: | - poetry run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short + uv run pytest cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py -v --tb=short test-with-different-databases: name: Test Weighted Edges with Different Graph Databases @@ -64,7 +64,7 @@ jobs: install_extra: "" graph_db_provider: "kuzu" - database: neo4j - install_extra: "-E neo4j" + install_extra: "--extra neo4j" graph_db_provider: "neo4j" env: LLM_PROVIDER: openai @@ -90,13 +90,13 @@ jobs: - name: Install Database Dependencies run: | - poetry install ${{ matrix.install_extra }} + uv sync ${{ matrix.install_extra }} - name: Run Weighted Edges Tests env: GRAPH_DATABASE_PROVIDER: ${{ matrix.graph_db_provider }} run: | - poetry run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short + uv run pytest cognee/tests/unit/interfaces/graph/test_weighted_edges.py -v --tb=short test-examples: name: Test Weighted Edges Examples @@ -125,7 +125,7 @@ jobs: - name: Test Weighted Edges Example run: | - poetry run python examples/python/weighted_edges_example.py + uv run python examples/python/weighted_edges_example.py - name: Verify Visualization File Created run: | From cb5590b655677311b6c64af8b8fc4f5999d711b8 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 14 Aug 2025 15:16:03 +0100 Subject: [PATCH 36/51] Update installation instructions and replace Poetry with uv across documentation and scripts --- README.md | 4 ++-- cognee-gui.py | 8 ++++---- cognee-mcp/README.md | 2 +- deployment/helm/Dockerfile | 17 ++++++----------- deployment/setup_ubuntu_instance.sh | 4 ++-- distributed/Dockerfile | 9 +++------ tools/check-lockfile.py | 4 ++-- 7 files changed, 20 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 194ede720..8f160727e 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ Your contributions are at the core of making this a true open source project. An ## 📦 Installation -You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager. +You can install Cognee using either **uv**, **pip**, **poetry** or any other python package manager. Cognee supports Python 3.8 to 3.12 ### With pip @@ -102,7 +102,7 @@ pip install cognee ## Local Cognee installation -You can install the local Cognee repo using **pip**, **poetry** and **uv**. +You can install the local Cognee repo using **uv**, **pip** and **poetry**. For local pip installation please make sure your pip version is above version 21.3. ### with UV with all optional dependencies diff --git a/cognee-gui.py b/cognee-gui.py index e62a08380..d08d31529 100644 --- a/cognee-gui.py +++ b/cognee-gui.py @@ -23,10 +23,10 @@ except ImportError as e: print( "\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n" ) - print("\nTo install with poetry use:") - print("\npoetry install -E gui\n") - print("\nOr to install with poetry and all dependencies use:") - print("\npoetry install --all-extras\n") + print("\nTo install with uv use:") + print("\nuv sync --extra gui\n") + print("\nOr to install with uv and all dependencies use:") + print("\nuv sync --all-extras\n") print("\nTo install with pip use: ") print('\npip install ".[gui]"\n') raise e diff --git a/cognee-mcp/README.md b/cognee-mcp/README.md index ffd46dd6e..ecb7a0f4b 100644 --- a/cognee-mcp/README.md +++ b/cognee-mcp/README.md @@ -211,7 +211,7 @@ Open inspector with timeout passed: To apply new changes while developing cognee you need to do: -1. `poetry lock` in cognee folder +1. `uv lock` in cognee folder 2. `uv sync --dev --all-extras --reinstall` 3. `mcp dev src/server.py` diff --git a/deployment/helm/Dockerfile b/deployment/helm/Dockerfile index 3f9ec7740..e6cdc81f0 100644 --- a/deployment/helm/Dockerfile +++ b/deployment/helm/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.11-slim -# Define Poetry extras to install -ARG POETRY_EXTRAS="\ +# Define uv extras to install +ARG UV_EXTRAS="\ # Storage & Databases \ postgres neo4j falkordb kuzu \ # Notebooks & Interactive Environments \ @@ -21,7 +21,6 @@ ARG DEBUG # Set environment variable based on the build argument ENV DEBUG=${DEBUG} ENV PIP_NO_CACHE_DIR=true -ENV PATH="${PATH}:/root/.poetry/bin" RUN apt-get install -y \ @@ -30,16 +29,12 @@ RUN apt-get install -y \ WORKDIR /app -COPY pyproject.toml poetry.lock /app/ +COPY pyproject.toml uv.lock /app/ +RUN pip install uv -RUN pip install poetry - -# Don't create virtualenv since docker is already isolated -RUN poetry config virtualenvs.create false - -# Install the dependencies -RUN poetry install --extras "${POETRY_EXTRAS}" --no-root --without dev +# Install the dependencies with uv +RUN uv sync --no-dev # Set the PYTHONPATH environment variable to include the /app directory diff --git a/deployment/setup_ubuntu_instance.sh b/deployment/setup_ubuntu_instance.sh index 854cd1c9f..c54e8f3f2 100644 --- a/deployment/setup_ubuntu_instance.sh +++ b/deployment/setup_ubuntu_instance.sh @@ -26,5 +26,5 @@ sudo apt install -y python3.12 virtualenv venv --python=python3.12 source venv/bin/activate -pip install poetry -poetry install +pip install uv +uv sync diff --git a/distributed/Dockerfile b/distributed/Dockerfile index 6ac818d45..6a5fb0ccb 100644 --- a/distributed/Dockerfile +++ b/distributed/Dockerfile @@ -2,7 +2,6 @@ FROM python:3.11-slim # Set environment variables ENV PIP_NO_CACHE_DIR=true -ENV PATH="${PATH}:/root/.poetry/bin" ENV PYTHONPATH=/app ENV RUN_MODE=modal ENV SKIP_MIGRATIONS=true @@ -19,13 +18,11 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app -COPY pyproject.toml poetry.lock README.md /app/ +COPY pyproject.toml uv.lock README.md /app/ -RUN pip install poetry +RUN pip install uv -RUN poetry config virtualenvs.create false - -RUN poetry install --extras neo4j --extras postgres --extras aws --extras distributed --no-root +RUN uv sync --extra neo4j --extra postgres --extra aws --extra distributed --no-dev COPY cognee/ /app/cognee COPY distributed/ /app/distributed diff --git a/tools/check-lockfile.py b/tools/check-lockfile.py index df60b35ac..39bea1165 100644 --- a/tools/check-lockfile.py +++ b/tools/check-lockfile.py @@ -1,7 +1,7 @@ import sys # File and string to search for -lockfile_name = "poetry.lock" +lockfile_name = "uv.lock" hash_string = "hash = " threshold = 100 @@ -19,7 +19,7 @@ try: # If the loop completes without early exit, it means the threshold was not reached print( - f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date poetry version." + f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date uv version." ) sys.exit(1) From a1edfc98c2025beca85af87439e7cf703faf85da Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 14 Aug 2025 15:17:00 +0100 Subject: [PATCH 37/51] docs: update README.md with new supported python versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f160727e..6f4cac325 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ Your contributions are at the core of making this a true open source project. An ## 📦 Installation You can install Cognee using either **uv**, **pip**, **poetry** or any other python package manager. -Cognee supports Python 3.8 to 3.12 +Cognee supports Python 3.10 to 3.13 ### With pip From 1ab332828f61b5da6313f3dd7b9c4e92bf7e8df7 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 14 Aug 2025 15:43:44 +0100 Subject: [PATCH 38/51] fix: uv uninstalls rest of packages in some workflows --- .github/actions/cognee_setup/action.yml | 14 +++++++++++++- .github/workflows/db_examples_tests.yml | 9 +-------- .github/workflows/e2e_tests.yml | 11 ++++------- .github/workflows/examples_tests.yml | 5 +---- .github/workflows/graph_db_tests.yml | 4 ---- .../workflows/relational_db_migration_tests.yml | 12 ++++++------ .github/workflows/reusable_notebook.yml | 5 +---- .github/workflows/search_db_tests.yml | 16 ++++++++-------- .github/workflows/vector_db_tests.yml | 5 +---- 9 files changed, 35 insertions(+), 46 deletions(-) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index e79619221..869582585 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -6,6 +6,10 @@ inputs: description: "Which Python version to use" required: false default: "3.11.x" + extra-dependencies: + description: "Additional extra dependencies to install (space-separated)" + required: false + default: "" runs: using: "composite" @@ -23,4 +27,12 @@ runs: - name: Install dependencies shell: bash - run: uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j + run: | + EXTRA_ARGS="" + if [ -n "${{ inputs.extra-dependencies }}" ]; then + IFS=' ' read -r -a deps <<< "${{ inputs.extra-dependencies }}" + for extra in "${deps[@]}"; do + EXTRA_ARGS="$EXTRA_ARGS --extra $extra" + done + fi + uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml index 552395e6a..ae61d2ab0 100644 --- a/.github/workflows/db_examples_tests.yml +++ b/.github/workflows/db_examples_tests.yml @@ -54,10 +54,6 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install Neo4j extra - run: | - uv sync --extra neo4j - - name: Run Neo4j Example env: ENV: dev @@ -138,10 +134,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} - - - name: Install PGVector extra - run: | - uv sync --extra postgres + extra-dependencies: "postgres" - name: Run PGVector Example env: diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 4f7c67314..8caeca5c3 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -140,10 +140,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' - - - name: Install specific db dependency - run: | - uv sync --extra postgres + extra-dependencies: "postgres" - name: Run Deduplication Example env: @@ -192,10 +189,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "aws" - - name: Install specific S3 dependency - run: | - uv sync --extra aws + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run S3 Bucket Test env: diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index f0ae430ec..277ed8c21 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -33,10 +33,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' - - - name: Install specific eval dependency - run: | - uv sync --extra deepeval + extra-dependencies: "deepeval" - name: Run Evaluation Framework Example env: diff --git a/.github/workflows/graph_db_tests.yml b/.github/workflows/graph_db_tests.yml index 91a611168..fe8fec4f0 100644 --- a/.github/workflows/graph_db_tests.yml +++ b/.github/workflows/graph_db_tests.yml @@ -72,10 +72,6 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install specific db dependency - run: | - uv sync --extra neo4j - - name: Run default Neo4j env: ENV: 'dev' diff --git a/.github/workflows/relational_db_migration_tests.yml b/.github/workflows/relational_db_migration_tests.yml index 604964512..26fd7e150 100644 --- a/.github/workflows/relational_db_migration_tests.yml +++ b/.github/workflows/relational_db_migration_tests.yml @@ -58,10 +58,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "postgres" - name: Install specific db dependency - run: | - uv sync --extra postgres + run: echo "Dependencies already installed in setup" - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: @@ -120,10 +120,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "postgres" - name: Install specific db dependency - run: | - uv sync --extra postgres + run: echo "Dependencies already installed in setup" - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: @@ -184,10 +184,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "postgres" - name: Install specific db dependency - run: | - uv sync --extra postgres --extra neo4j + run: echo "Dependencies already installed in setup" - name: Run PostgreSQL Script to create test data (Chinook_PostgreSql.sql) env: diff --git a/.github/workflows/reusable_notebook.yml b/.github/workflows/reusable_notebook.yml index 7ee7f61f3..8eaf7ce3a 100644 --- a/.github/workflows/reusable_notebook.yml +++ b/.github/workflows/reusable_notebook.yml @@ -46,10 +46,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} - - - name: Install specific db dependency - run: | - uv sync --extra notebook + extra-dependencies: "notebook" - name: Execute Jupyter Notebook env: diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml index a2ab00754..c468bd1c6 100644 --- a/.github/workflows/search_db_tests.yml +++ b/.github/workflows/search_db_tests.yml @@ -78,9 +78,8 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install specific db dependency - run: | - uv sync --extra neo4j + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Neo4j search Tests env: @@ -129,9 +128,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} + extra-dependencies: "postgres" - - name: Install dependencies - run: uv sync --extra postgres + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Kuzu/PGVector/Postgres Tests env: @@ -195,10 +195,10 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} + extra-dependencies: "postgres" - - name: Install dependencies - run: | - uv sync --extra neo4j --extra postgres + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Neo4j + PGVector + Postgres search Tests env: diff --git a/.github/workflows/vector_db_tests.yml b/.github/workflows/vector_db_tests.yml index faa570868..a82194db3 100644 --- a/.github/workflows/vector_db_tests.yml +++ b/.github/workflows/vector_db_tests.yml @@ -87,10 +87,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: ${{ inputs.python-version }} - - - name: Install specific db dependency - run: | - uv sync --extra postgres + extra-dependencies: "postgres" - name: Run PGVector Tests env: From bcdbadc4686148d4e7fd598df36bad03ae4327e2 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 14 Aug 2025 16:03:16 +0100 Subject: [PATCH 39/51] fix: unintentionally uninstall required deps when "uv sync" --- .github/workflows/db_examples_tests.yml | 5 ++--- .github/workflows/e2e_tests.yml | 15 ++++++--------- .github/workflows/graph_db_tests.yml | 5 ++--- .github/workflows/search_db_tests.yml | 5 ++--- .github/workflows/weighted_edges_tests.yml | 7 ++----- 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml index ae61d2ab0..a1a81b4d4 100644 --- a/.github/workflows/db_examples_tests.yml +++ b/.github/workflows/db_examples_tests.yml @@ -86,9 +86,8 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install Kuzu extra - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Kuzu Example env: diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 8caeca5c3..54a7cf98c 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -221,9 +221,8 @@ jobs: with: python-version: '3.11.x' - - name: Install specific graph db dependency - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run parallel databases test env: @@ -250,9 +249,8 @@ jobs: with: python-version: '3.11.x' - - name: Install specific graph db dependency - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run parallel databases test env: @@ -279,9 +277,8 @@ jobs: with: python-version: '3.11.x' - - name: Install specific graph db dependency - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run graph edges test env: diff --git a/.github/workflows/graph_db_tests.yml b/.github/workflows/graph_db_tests.yml index fe8fec4f0..e7e5a11b8 100644 --- a/.github/workflows/graph_db_tests.yml +++ b/.github/workflows/graph_db_tests.yml @@ -28,9 +28,8 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install specific db dependency - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Kuzu Tests env: diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml index c468bd1c6..0b749526d 100644 --- a/.github/workflows/search_db_tests.yml +++ b/.github/workflows/search_db_tests.yml @@ -28,9 +28,8 @@ jobs: with: python-version: ${{ inputs.python-version }} - - name: Install specific db dependency - run: | - uv sync + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Kuzu search Tests env: diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 6d33082a9..555660caa 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -61,10 +61,8 @@ jobs: database: ['kuzu', 'neo4j'] include: - database: kuzu - install_extra: "" graph_db_provider: "kuzu" - database: neo4j - install_extra: "--extra neo4j" graph_db_provider: "neo4j" env: LLM_PROVIDER: openai @@ -88,9 +86,8 @@ jobs: with: python-version: '3.11' - - name: Install Database Dependencies - run: | - uv sync ${{ matrix.install_extra }} + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" - name: Run Weighted Edges Tests env: From 3941e469e5132aa89d4fb8d09e7a7bb539feb878 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 09:41:40 +0100 Subject: [PATCH 40/51] chore: add step to rebuild uv lockfile in CI workflow --- .github/actions/cognee_setup/action.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index 869582585..9ae669b53 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -24,6 +24,12 @@ runs: uses: astral-sh/setup-uv@v4 with: enable-cache: true + + - name: Rebuild uv lockfile + shell: bash + run: | + rm uv.lock + uv lock - name: Install dependencies shell: bash From 4312508cf3f0c78eccd435d949d4c58af1d7e186 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 09:45:25 +0100 Subject: [PATCH 41/51] undo: keep poetry-uv change limited to CI/CD fo now --- cognee-gui.py | 8 ++++---- cognee-mcp/README.md | 2 +- deployment/helm/Dockerfile | 17 +++++++++++------ deployment/setup_ubuntu_instance.sh | 4 ++-- distributed/Dockerfile | 9 ++++++--- tools/check-lockfile.py | 4 ++-- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/cognee-gui.py b/cognee-gui.py index d08d31529..e62a08380 100644 --- a/cognee-gui.py +++ b/cognee-gui.py @@ -23,10 +23,10 @@ except ImportError as e: print( "\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n" ) - print("\nTo install with uv use:") - print("\nuv sync --extra gui\n") - print("\nOr to install with uv and all dependencies use:") - print("\nuv sync --all-extras\n") + print("\nTo install with poetry use:") + print("\npoetry install -E gui\n") + print("\nOr to install with poetry and all dependencies use:") + print("\npoetry install --all-extras\n") print("\nTo install with pip use: ") print('\npip install ".[gui]"\n') raise e diff --git a/cognee-mcp/README.md b/cognee-mcp/README.md index ecb7a0f4b..ffd46dd6e 100644 --- a/cognee-mcp/README.md +++ b/cognee-mcp/README.md @@ -211,7 +211,7 @@ Open inspector with timeout passed: To apply new changes while developing cognee you need to do: -1. `uv lock` in cognee folder +1. `poetry lock` in cognee folder 2. `uv sync --dev --all-extras --reinstall` 3. `mcp dev src/server.py` diff --git a/deployment/helm/Dockerfile b/deployment/helm/Dockerfile index e6cdc81f0..3f9ec7740 100644 --- a/deployment/helm/Dockerfile +++ b/deployment/helm/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.11-slim -# Define uv extras to install -ARG UV_EXTRAS="\ +# Define Poetry extras to install +ARG POETRY_EXTRAS="\ # Storage & Databases \ postgres neo4j falkordb kuzu \ # Notebooks & Interactive Environments \ @@ -21,6 +21,7 @@ ARG DEBUG # Set environment variable based on the build argument ENV DEBUG=${DEBUG} ENV PIP_NO_CACHE_DIR=true +ENV PATH="${PATH}:/root/.poetry/bin" RUN apt-get install -y \ @@ -29,12 +30,16 @@ RUN apt-get install -y \ WORKDIR /app -COPY pyproject.toml uv.lock /app/ +COPY pyproject.toml poetry.lock /app/ -RUN pip install uv -# Install the dependencies with uv -RUN uv sync --no-dev +RUN pip install poetry + +# Don't create virtualenv since docker is already isolated +RUN poetry config virtualenvs.create false + +# Install the dependencies +RUN poetry install --extras "${POETRY_EXTRAS}" --no-root --without dev # Set the PYTHONPATH environment variable to include the /app directory diff --git a/deployment/setup_ubuntu_instance.sh b/deployment/setup_ubuntu_instance.sh index c54e8f3f2..854cd1c9f 100644 --- a/deployment/setup_ubuntu_instance.sh +++ b/deployment/setup_ubuntu_instance.sh @@ -26,5 +26,5 @@ sudo apt install -y python3.12 virtualenv venv --python=python3.12 source venv/bin/activate -pip install uv -uv sync +pip install poetry +poetry install diff --git a/distributed/Dockerfile b/distributed/Dockerfile index 6a5fb0ccb..6ac818d45 100644 --- a/distributed/Dockerfile +++ b/distributed/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.11-slim # Set environment variables ENV PIP_NO_CACHE_DIR=true +ENV PATH="${PATH}:/root/.poetry/bin" ENV PYTHONPATH=/app ENV RUN_MODE=modal ENV SKIP_MIGRATIONS=true @@ -18,11 +19,13 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app -COPY pyproject.toml uv.lock README.md /app/ +COPY pyproject.toml poetry.lock README.md /app/ -RUN pip install uv +RUN pip install poetry -RUN uv sync --extra neo4j --extra postgres --extra aws --extra distributed --no-dev +RUN poetry config virtualenvs.create false + +RUN poetry install --extras neo4j --extras postgres --extras aws --extras distributed --no-root COPY cognee/ /app/cognee COPY distributed/ /app/distributed diff --git a/tools/check-lockfile.py b/tools/check-lockfile.py index 39bea1165..df60b35ac 100644 --- a/tools/check-lockfile.py +++ b/tools/check-lockfile.py @@ -1,7 +1,7 @@ import sys # File and string to search for -lockfile_name = "uv.lock" +lockfile_name = "poetry.lock" hash_string = "hash = " threshold = 100 @@ -19,7 +19,7 @@ try: # If the loop completes without early exit, it means the threshold was not reached print( - f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date uv version." + f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date poetry version." ) sys.exit(1) From 81a17580a60eff668ed55a3b9d59596aa76e4b83 Mon Sep 17 00:00:00 2001 From: misselvexu Date: Wed, 13 Aug 2025 17:37:36 +0800 Subject: [PATCH 42/51] Remove NetworkX from cognee core. --- cognee/api/v1/add/add.py | 2 +- .../databases/graph/get_graph_engine.py | 12 +- .../databases/graph/networkx/__init__.py | 0 .../databases/graph/networkx/adapter.py | 1017 ----------------- .../retrieval/cypher_search_retriever.py | 10 +- .../retrieval/natural_language_retriever.py | 8 +- .../cognee_network_visualization.py | 31 +- cognee/shared/utils.py | 1 - pyproject.toml | 2 +- 9 files changed, 39 insertions(+), 1044 deletions(-) delete mode 100644 cognee/infrastructure/databases/graph/networkx/__init__.py delete mode 100644 cognee/infrastructure/databases/graph/networkx/adapter.py diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 7daaaf1dd..548689a1d 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -132,7 +132,7 @@ async def add( - DEFAULT_USER_EMAIL: Custom default user email - DEFAULT_USER_PASSWORD: Custom default user password - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector" - - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx" + - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j" Raises: FileNotFoundError: If specified file paths don't exist diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 37be4bc7b..229fb27c4 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -21,9 +21,6 @@ async def get_graph_engine() -> GraphDBInterface: if hasattr(graph_client, "initialize"): await graph_client.initialize() - # Handle loading of graph for NetworkX - if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None: - await graph_client.load_graph_from_file() return graph_client @@ -181,8 +178,7 @@ def create_graph_engine( graph_id=graph_identifier, ) - from .networkx.adapter import NetworkXAdapter - - graph_client = NetworkXAdapter(filename=graph_file_path) - - return graph_client + raise EnvironmentError( + f"Unsupported graph database provider: {graph_database_provider}. " + f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}" + ) diff --git a/cognee/infrastructure/databases/graph/networkx/__init__.py b/cognee/infrastructure/databases/graph/networkx/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py deleted file mode 100644 index da9b506a0..000000000 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ /dev/null @@ -1,1017 +0,0 @@ -"""Adapter for NetworkX graph database.""" - -import os -import json -import asyncio -import numpy as np -from uuid import UUID -import networkx as nx -from datetime import datetime, timezone -from typing import Dict, Any, List, Union, Type, Tuple - -from cognee.infrastructure.databases.exceptions.exceptions import NodesetFilterNotSupportedError -from cognee.infrastructure.files.storage import get_file_storage -from cognee.shared.logging_utils import get_logger -from cognee.infrastructure.databases.graph.graph_db_interface import ( - GraphDBInterface, - record_graph_changes, -) -from cognee.infrastructure.engine import DataPoint -from cognee.infrastructure.engine.utils import parse_id -from cognee.modules.storage.utils import JSONEncoder - -logger = get_logger() - - -class NetworkXAdapter(GraphDBInterface): - """ - Manage a singleton instance of a graph database interface, utilizing the NetworkX - library. Handles graph data access and manipulation, including nodes and edges - management, persistence, and auxiliary functionalities. - """ - - _instance = None - graph = None # Class variable to store the singleton instance - - def __new__(cls, filename): - if cls._instance is None: - cls._instance = super().__new__(cls) - cls._instance.filename = filename - return cls._instance - - def __init__(self, filename="cognee_graph.pkl"): - self.filename = filename - - async def get_graph_data(self): - """ - Retrieve graph data including nodes and edges. - - Returns: - -------- - - A tuple containing a list of node data and a list of edge data. - """ - await self.load_graph_from_file() - return (list(self.graph.nodes(data=True)), list(self.graph.edges(data=True, keys=True))) - - async def query(self, query: str, params: dict): - """ - Execute a query against the graph data. The specifics of the query execution need to be - implemented. - - Parameters: - ----------- - - - query (str): The query string to run against the graph. - - params (dict): Parameters for the query, if necessary. - """ - pass - - async def has_node(self, node_id: UUID) -> bool: - """ - Determine if a specific node exists in the graph. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node to check. - - Returns: - -------- - - - bool: True if the node exists, otherwise False. - """ - return self.graph.has_node(node_id) - - async def add_node(self, node: DataPoint) -> None: - """ - Add a node to the graph and persist the graph state to the file. - - Parameters: - ----------- - - - node (DataPoint): The node to be added, represented as a DataPoint object. - """ - self.graph.add_node(node.id, **node.model_dump()) - - await self.save_graph_to_file(self.filename) - - @record_graph_changes - async def add_nodes(self, nodes: list[DataPoint]) -> None: - """ - Bulk add multiple nodes to the graph and persist the graph state to the file. - - Parameters: - ----------- - - - nodes (list[DataPoint]): A list of DataPoint objects defining the nodes to be - added. - """ - nodes = [(node.id, node.model_dump()) for node in nodes] - self.graph.add_nodes_from(nodes) - await self.save_graph_to_file(self.filename) - - async def get_graph(self): - """ - Retrieve the current state of the graph. - - Returns: - -------- - - The current graph instance. - """ - return self.graph - - async def has_edge(self, from_node: str, to_node: str, edge_label: str) -> bool: - """ - Check for the existence of a specific edge in the graph. - - Parameters: - ----------- - - - from_node (str): The identifier of the source node. - - to_node (str): The identifier of the target node. - - edge_label (str): The label of the edge to check. - - Returns: - -------- - - - bool: True if the edge exists, otherwise False. - """ - return self.graph.has_edge(from_node, to_node, key=edge_label) - - async def has_edges(self, edges): - """ - Check for the existence of multiple edges in the graph. - - Parameters: - ----------- - - - edges: A list of edges to check, defined as tuples of (from_node, to_node, - edge_label). - - Returns: - -------- - - A list of edges that exist in the graph. - """ - result = [] - - for from_node, to_node, edge_label in edges: - if self.graph.has_edge(from_node, to_node, edge_label): - result.append((from_node, to_node, edge_label)) - - return result - - @record_graph_changes - async def add_edge( - self, - from_node: str, - to_node: str, - relationship_name: str, - edge_properties: Dict[str, Any] = {}, - ) -> None: - """ - Add a single edge to the graph and persist the graph state to the file. - - Parameters: - ----------- - - - from_node (str): The identifier of the source node for the edge. - - to_node (str): The identifier of the target node for the edge. - - relationship_name (str): The label for the relationship as the edge is created. - - edge_properties (Dict[str, Any]): Additional properties for the edge, if any. - (default {}) - """ - edge_properties["updated_at"] = datetime.now(timezone.utc) - self.graph.add_edge( - from_node, - to_node, - key=relationship_name, - **(edge_properties if edge_properties else {}), - ) - - await self.save_graph_to_file(self.filename) - - @record_graph_changes - async def add_edges(self, edges: list[tuple[str, str, str, dict]]) -> None: - """ - Bulk add multiple edges to the graph and persist the graph state to the file. - - Parameters: - ----------- - - - edges (list[tuple[str, str, str, dict]]): A list of edges defined as tuples - containing (from_node, to_node, relationship_name, edge_properties). - """ - if not edges: - logger.debug("No edges to add") - return - - try: - # Validate edge format and convert UUIDs to strings - processed_edges = [] - for edge in edges: - if len(edge) < 3 or len(edge) > 4: - raise ValueError( - f"Invalid edge format: {edge}. Expected (from_node, to_node, relationship_name[, properties])" - ) - - # Convert UUIDs to strings if needed - from_node = str(edge[0]) if isinstance(edge[0], UUID) else edge[0] - to_node = str(edge[1]) if isinstance(edge[1], UUID) else edge[1] - relationship_name = edge[2] - - if not all(isinstance(x, str) for x in [from_node, to_node, relationship_name]): - raise ValueError( - f"First three elements of edge must be strings or UUIDs: {edge}" - ) - - # Process edge with updated_at timestamp - processed_edge = ( - from_node, - to_node, - relationship_name, - { - **(edge[3] if len(edge) == 4 else {}), - "updated_at": datetime.now(timezone.utc), - }, - ) - processed_edges.append(processed_edge) - - # Add edges to graph - self.graph.add_edges_from(processed_edges) - logger.debug(f"Added {len(processed_edges)} edges to graph") - - # Save changes - await self.save_graph_to_file(self.filename) - except Exception as e: - logger.error(f"Failed to add edges: {e}") - raise - - async def get_edges(self, node_id: UUID): - """ - Retrieve edges connected to a specific node. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node whose edges are to be retrieved. - - Returns: - -------- - - A list of edges connected to the specified node. - """ - return list(self.graph.in_edges(node_id, data=True)) + list( - self.graph.out_edges(node_id, data=True) - ) - - async def delete_node(self, node_id: UUID) -> None: - """ - Remove a node and its associated edges from the graph, then persist the changes. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node to delete. - """ - - if self.graph.has_node(node_id): - # First remove all edges connected to the node - for edge in list(self.graph.edges(node_id, data=True)): - source, target, data = edge - self.graph.remove_edge(source, target, key=data.get("relationship_name")) - - # Then remove the node itself - self.graph.remove_node(node_id) - - # Save the updated graph state - await self.save_graph_to_file(self.filename) - else: - logger.error(f"Node {node_id} not found in graph") - - async def delete_nodes(self, node_ids: List[UUID]) -> None: - """ - Bulk delete nodes from the graph and persist the changes. - - Parameters: - ----------- - - - node_ids (List[UUID]): A list of node identifiers to delete. - """ - self.graph.remove_nodes_from(node_ids) - await self.save_graph_to_file(self.filename) - - async def get_disconnected_nodes(self) -> List[str]: - """ - Identify nodes that are not connected to any other nodes in the graph. - - Returns: - -------- - - - List[str]: A list of identifiers for disconnected nodes. - """ - connected_components = list(nx.weakly_connected_components(self.graph)) - - disconnected_nodes = [] - biggest_subgraph = max(connected_components, key=len) - - for component in connected_components: - if component != biggest_subgraph: - disconnected_nodes.extend(list(component)) - - return disconnected_nodes - - async def extract_node(self, node_id: UUID) -> dict: - """ - Retrieve data for a specific node based on its identifier. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node to retrieve. - - Returns: - -------- - - - dict: The data of the specified node, or None if not found. - """ - if self.graph.has_node(node_id): - return self.graph.nodes[node_id] - - return None - - async def extract_nodes(self, node_ids: List[UUID]) -> List[dict]: - """ - Retrieve data for multiple nodes based on their identifiers. - - Parameters: - ----------- - - - node_ids (List[UUID]): A list of node identifiers to retrieve data. - - Returns: - -------- - - - List[dict]: A list of data for each node identified that exists in the graph. - """ - return [self.graph.nodes[node_id] for node_id in node_ids if self.graph.has_node(node_id)] - - async def get_predecessors(self, node_id: UUID, edge_label: str = None) -> list: - """ - Retrieve the predecessor nodes of a specified node according to a specific edge label. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node for which to find predecessors. - - edge_label (str): The label for the edges connecting to predecessors; if None, all - predecessors are retrieved. (default None) - - Returns: - -------- - - - list: A list of predecessor nodes. - """ - if self.graph.has_node(node_id): - if edge_label is None: - return [ - self.graph.nodes[predecessor] - for predecessor in list(self.graph.predecessors(node_id)) - ] - - nodes = [] - - for predecessor_id in list(self.graph.predecessors(node_id)): - if self.graph.has_edge(predecessor_id, node_id, edge_label): - nodes.append(self.graph.nodes[predecessor_id]) - - return nodes - - async def get_successors(self, node_id: UUID, edge_label: str = None) -> list: - """ - Retrieve the successor nodes of a specified node according to a specific edge label. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node for which to find successors. - - edge_label (str): The label for the edges connecting to successors; if None, all - successors are retrieved. (default None) - - Returns: - -------- - - - list: A list of successor nodes. - """ - if self.graph.has_node(node_id): - if edge_label is None: - return [ - self.graph.nodes[successor] - for successor in list(self.graph.successors(node_id)) - ] - - nodes = [] - - for successor_id in list(self.graph.successors(node_id)): - if self.graph.has_edge(node_id, successor_id, edge_label): - nodes.append(self.graph.nodes[successor_id]) - - return nodes - - async def get_neighbors(self, node_id: UUID) -> list: - """ - Get the neighboring nodes of a specified node, including both predecessors and - successors. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node whose neighbors are to be retrieved. - - Returns: - -------- - - - list: A list of neighboring nodes. - """ - if not self.graph.has_node(node_id): - return [] - - predecessors, successors = await asyncio.gather( - self.get_predecessors(node_id), - self.get_successors(node_id), - ) - - neighbors = predecessors + successors - - return neighbors - - async def get_connections(self, node_id: UUID) -> list: - """ - Get the connections of a specified node to its neighbors. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node for which to get connections. - - Returns: - -------- - - - list: A list of connections involving the specified node and its neighbors. - """ - if not self.graph.has_node(node_id): - return [] - - node = self.graph.nodes[node_id] - - if "id" not in node: - return [] - - predecessors, successors = await asyncio.gather( - self.get_predecessors(node_id), - self.get_successors(node_id), - ) - - connections = [] - - # Handle None values for predecessors and successors - if predecessors is not None: - for neighbor in predecessors: - if "id" in neighbor: - edge_data = self.graph.get_edge_data(neighbor["id"], node["id"]) - if edge_data is not None: - for edge_properties in edge_data.values(): - connections.append((neighbor, edge_properties, node)) - - if successors is not None: - for neighbor in successors: - if "id" in neighbor: - edge_data = self.graph.get_edge_data(node["id"], neighbor["id"]) - if edge_data is not None: - for edge_properties in edge_data.values(): - connections.append((node, edge_properties, neighbor)) - - return connections - - async def remove_connection_to_predecessors_of( - self, node_ids: list[UUID], edge_label: str - ) -> None: - """ - Remove connections to predecessors of specified nodes based on an edge label and persist - changes. - - Parameters: - ----------- - - - node_ids (list[UUID]): A list of node identifiers whose predecessor connections - need to be removed. - - edge_label (str): The label of the edges to remove. - """ - for node_id in node_ids: - if self.graph.has_node(node_id): - for predecessor_id in list(self.graph.predecessors(node_id)): - if self.graph.has_edge(predecessor_id, node_id, edge_label): - self.graph.remove_edge(predecessor_id, node_id, edge_label) - - await self.save_graph_to_file(self.filename) - - async def remove_connection_to_successors_of( - self, node_ids: list[UUID], edge_label: str - ) -> None: - """ - Remove connections to successors of specified nodes based on an edge label and persist - changes. - - Parameters: - ----------- - - - node_ids (list[UUID]): A list of node identifiers whose successor connections need - to be removed. - - edge_label (str): The label of the edges to remove. - """ - for node_id in node_ids: - if self.graph.has_node(node_id): - for successor_id in list(self.graph.successors(node_id)): - if self.graph.has_edge(node_id, successor_id, edge_label): - self.graph.remove_edge(node_id, successor_id, edge_label) - - await self.save_graph_to_file(self.filename) - - async def create_empty_graph(self, file_path: str) -> None: - """ - Initialize an empty graph and save it to a specified file path. - - Parameters: - ----------- - - - file_path (str): The file path where the empty graph should be saved. - """ - self.graph = nx.MultiDiGraph() - - await self.save_graph_to_file(file_path) - - async def save_graph_to_file(self, file_path: str = None) -> None: - """ - Save the graph data asynchronously to a specified file in JSON format. - - Parameters: - ----------- - - - file_path (str): The file path to save the graph data; if None, saves to the - default filename. (default None) - """ - if not file_path: - file_path = self.filename - - graph_data = nx.readwrite.json_graph.node_link_data(self.graph, edges="links") - - file_dir_path = os.path.dirname(file_path) - file_path = os.path.basename(file_path) - - file_storage = get_file_storage(file_dir_path) - - json_data = json.dumps(graph_data, cls=JSONEncoder) - - await file_storage.store(file_path, json_data, overwrite=True) - - async def load_graph_from_file(self, file_path: str = None): - """ - Load graph data asynchronously from a specified file in JSON format. - - Parameters: - ----------- - - - file_path (str): The file path from which to load the graph data; if None, loads - from the default filename. (default None) - """ - if not file_path: - file_path = self.filename - try: - file_dir_path = os.path.dirname(file_path) - file_name = os.path.basename(file_path) - - file_storage = get_file_storage(file_dir_path) - - if await file_storage.file_exists(file_name): - async with file_storage.open(file_name, "r") as file: - graph_data = json.loads(file.read()) - for node in graph_data["nodes"]: - try: - if not isinstance(node["id"], UUID): - try: - node["id"] = UUID(node["id"]) - except Exception: - # If conversion fails, keep the original id - pass - except Exception as e: - logger.error(e) - raise e - - if isinstance(node.get("updated_at"), int): - node["updated_at"] = datetime.fromtimestamp( - node["updated_at"] / 1000, tz=timezone.utc - ) - elif isinstance(node.get("updated_at"), str): - node["updated_at"] = datetime.strptime( - node["updated_at"], "%Y-%m-%dT%H:%M:%S.%f%z" - ) - - for edge in graph_data["links"]: - try: - if not isinstance(edge["source"], UUID): - source_id = parse_id(edge["source"]) - else: - source_id = edge["source"] - - if not isinstance(edge["target"], UUID): - target_id = parse_id(edge["target"]) - else: - target_id = edge["target"] - - edge["source"] = source_id - edge["target"] = target_id - edge["source_node_id"] = source_id - edge["target_node_id"] = target_id - except Exception as e: - logger.error(e) - raise e - - if isinstance( - edge.get("updated_at"), int - ): # Handle timestamp in milliseconds - edge["updated_at"] = datetime.fromtimestamp( - edge["updated_at"] / 1000, tz=timezone.utc - ) - elif isinstance(edge.get("updated_at"), str): - edge["updated_at"] = datetime.strptime( - edge["updated_at"], "%Y-%m-%dT%H:%M:%S.%f%z" - ) - - self.graph = nx.readwrite.json_graph.node_link_graph(graph_data, edges="links") - - for node_id, node_data in self.graph.nodes(data=True): - node_data["id"] = node_id - else: - # Log that the file does not exist and an empty graph is initialized - logger.warning("File %s not found. Initializing an empty graph.", file_path) - await self.create_empty_graph(file_path) - - except Exception: - logger.error("Failed to load graph from file: %s", file_path) - - await self.create_empty_graph(file_path) - - async def delete_graph(self, file_path: str = None): - """ - Delete the graph file from the filesystem asynchronously. - - Parameters: - ----------- - - - file_path (str): The file path of the graph to delete; if None, deletes the - default graph file. (default None) - """ - if file_path is None: - file_path = ( - self.filename - ) # Assuming self.filename is defined elsewhere and holds the default graph file path - try: - file_dir_path = os.path.dirname(file_path) - file_name = os.path.basename(file_path) - - file_storage = get_file_storage(file_dir_path) - - await file_storage.remove(file_name) - - self.graph = None - logger.info("Graph deleted successfully.") - except Exception as error: - logger.error("Failed to delete graph: %s", error) - raise error - - async def get_nodeset_subgraph( - self, node_type: Type[Any], node_name: List[str] - ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]: - """ - Obtain a subgraph based on specific node types and names. Not supported in this - implementation. - - Parameters: - ----------- - - - node_type (Type[Any]): The type of nodes to include in the subgraph. - - node_name (List[str]): A list of node names to filter by. - """ - raise NodesetFilterNotSupportedError - - async def get_filtered_graph_data( - self, attribute_filters: List[Dict[str, List[Union[str, int]]]] - ): - """ - Fetch nodes and relationships filtered by specified attributes. - - Parameters: - ----------- - - - attribute_filters (List[Dict[str, List[Union[str, int]]]]): A list of dictionaries - defining attributes to filter on. - - Returns: - -------- - - A tuple containing filtered nodes and edges based on the specified attributes. - """ - # Create filters for nodes based on the attribute filters - where_clauses = [] - for attribute, values in attribute_filters[0].items(): - where_clauses.append((attribute, values)) - - # Filter nodes - filtered_nodes = [ - (node, data) - for node, data in self.graph.nodes(data=True) - if all(data.get(attr) in values for attr, values in where_clauses) - ] - - # Filter edges where both source and target nodes satisfy the filters - filtered_edges = [ - (source, target, data.get("relationship_type", "UNKNOWN"), data) - for source, target, data in self.graph.edges(data=True) - if ( - all(self.graph.nodes[source].get(attr) in values for attr, values in where_clauses) - and all( - self.graph.nodes[target].get(attr) in values for attr, values in where_clauses - ) - ) - ] - - return filtered_nodes, filtered_edges - - async def get_graph_metrics(self, include_optional=False): - """ - Calculate various metrics related to the graph, optionally including optional metrics. - - Parameters: - ----------- - - - include_optional: Indicates whether optional metrics should be included in the - calculation. (default False) - - Returns: - -------- - - A dictionary containing the calculated graph metrics. - """ - graph = self.graph - - def _get_mean_degree(graph): - degrees = [d for _, d in graph.degree()] - return np.mean(degrees) if degrees else 0 - - def _get_edge_density(graph): - num_nodes = graph.number_of_nodes() - num_edges = graph.number_of_edges() - num_possible_edges = num_nodes * (num_nodes - 1) - edge_density = num_edges / num_possible_edges if num_possible_edges > 0 else 0 - return edge_density - - def _get_diameter(graph): - try: - return nx.diameter(nx.DiGraph(graph.to_undirected())) - except Exception as e: - logger.warning("Failed to calculate diameter: %s", e) - return None - - def _get_avg_shortest_path_length(graph): - try: - return nx.average_shortest_path_length(nx.DiGraph(graph.to_undirected())) - except Exception as e: - logger.warning("Failed to calculate average shortest path length: %s", e) - return None - - def _get_avg_clustering(graph): - try: - return nx.average_clustering(nx.DiGraph(graph.to_undirected())) - except Exception as e: - logger.warning("Failed to calculate clustering coefficient: %s", e) - return None - - mandatory_metrics = { - "num_nodes": graph.number_of_nodes(), - "num_edges": graph.number_of_edges(), - "mean_degree": _get_mean_degree(graph), - "edge_density": _get_edge_density(graph), - "num_connected_components": nx.number_weakly_connected_components(graph), - "sizes_of_connected_components": [ - len(c) for c in nx.weakly_connected_components(graph) - ], - } - - if include_optional: - optional_metrics = { - "num_selfloops": sum(1 for u, v in graph.edges() if u == v), - "diameter": _get_diameter(graph), - "avg_shortest_path_length": _get_avg_shortest_path_length(graph), - "avg_clustering": _get_avg_clustering(graph), - } - else: - optional_metrics = { - "num_selfloops": -1, - "diameter": -1, - "avg_shortest_path_length": -1, - "avg_clustering": -1, - } - - return mandatory_metrics | optional_metrics - - async def get_document_subgraph(self, data_id: str): - """ - Retrieve all relevant nodes when a document is being deleted, including chunks and - orphaned entities. - - Parameters: - ----------- - - - data_id(str): The data id identifying the document to fetch - related nodes for. - - Returns: - -------- - - A dictionary containing the document, its chunks, orphan entities, made from nodes, - and orphan types. - """ - # Ensure graph is loaded - if self.graph is None: - await self.load_graph_from_file() - - # Find the document node by looking for content_hash in the name field - document = None - document_node_id = None - for node_id, attrs in self.graph.nodes(data=True): - if ( - attrs.get("type") in ["TextDocument", "PdfDocument"] - and attrs.get("id") == f"{data_id}" - ): - document = {"id": str(node_id), **attrs} # Convert UUID to string for consistency - document_node_id = node_id # Keep the original UUID - break - - if not document: - return None - - # Find chunks connected via is_part_of (chunks point TO document) - chunks = [] - for source, target, edge_data in self.graph.in_edges(document_node_id, data=True): - if edge_data.get("relationship_name") == "is_part_of": - chunks.append({"id": source, **self.graph.nodes[source]}) # Keep as UUID object - - # Find entities connected to chunks (chunks point TO entities via contains) - entities = [] - for chunk in chunks: - chunk_id = chunk["id"] # Already a UUID object - for source, target, edge_data in self.graph.out_edges(chunk_id, data=True): - if edge_data.get("relationship_name") == "contains": - entities.append( - {"id": target, **self.graph.nodes[target]} - ) # Keep as UUID object - - # Find orphaned entities (entities only connected to chunks we're deleting) - orphan_entities = [] - for entity in entities: - entity_id = entity["id"] # Already a UUID object - # Get all chunks that contain this entity - containing_chunks = [] - for source, target, edge_data in self.graph.in_edges(entity_id, data=True): - if edge_data.get("relationship_name") == "contains": - containing_chunks.append(source) # Keep as UUID object - - # Check if all containing chunks are in our chunks list - chunk_ids = [chunk["id"] for chunk in chunks] - if containing_chunks and all(c in chunk_ids for c in containing_chunks): - orphan_entities.append(entity) - - # Find orphaned entity types - orphan_types = [] - seen_types = set() # Track seen types to avoid duplicates - for entity in orphan_entities: - entity_id = entity["id"] # Already a UUID object - for _, target, edge_data in self.graph.out_edges(entity_id, data=True): - if edge_data.get("relationship_name") in ["is_a", "instance_of"]: - # Check if this type is only connected to entities we're deleting - type_node = self.graph.nodes[target] - if type_node.get("type") == "EntityType" and target not in seen_types: - is_orphaned = True - # Get all incoming edges to this type node - for source, _, edge_data in self.graph.in_edges(target, data=True): - if edge_data.get("relationship_name") in ["is_a", "instance_of"]: - # Check if the source entity is not in our orphan_entities list - if source not in [e["id"] for e in orphan_entities]: - is_orphaned = False - break - if is_orphaned: - orphan_types.append({"id": target, **type_node}) # Keep as UUID object - seen_types.add(target) # Mark as seen - - # Find nodes connected via made_from (chunks point TO summaries) - made_from_nodes = [] - for chunk in chunks: - chunk_id = chunk["id"] # Already a UUID object - for source, target, edge_data in self.graph.in_edges(chunk_id, data=True): - if edge_data.get("relationship_name") == "made_from": - made_from_nodes.append( - {"id": source, **self.graph.nodes[source]} - ) # Keep as UUID object - - # Return UUIDs directly without string conversion - return { - "document": [{"id": document["id"], **{k: v for k, v in document.items() if k != "id"}}] - if document - else [], - "chunks": [ - {"id": chunk["id"], **{k: v for k, v in chunk.items() if k != "id"}} - for chunk in chunks - ], - "orphan_entities": [ - {"id": entity["id"], **{k: v for k, v in entity.items() if k != "id"}} - for entity in orphan_entities - ], - "made_from_nodes": [ - {"id": node["id"], **{k: v for k, v in node.items() if k != "id"}} - for node in made_from_nodes - ], - "orphan_types": [ - {"id": type_node["id"], **{k: v for k, v in type_node.items() if k != "id"}} - for type_node in orphan_types - ], - } - - async def get_degree_one_nodes(self, node_type: str): - """ - Retrieve nodes that have only a single connection, filtered by node type. - - Parameters: - ----------- - - - node_type (str): Type of nodes to filter by ('Entity' or 'EntityType'). - - Returns: - -------- - - A list of nodes that have a single connection of the specified type. - """ - if not node_type or node_type not in ["Entity", "EntityType"]: - raise ValueError("node_type must be either 'Entity' or 'EntityType'") - - nodes = [] - for node_id, node_data in self.graph.nodes(data=True): - if node_data.get("type") == node_type: - # Count both incoming and outgoing edges - degree = self.graph.degree(node_id) - if degree == 1: - nodes.append(node_data) - return nodes - - async def get_node(self, node_id: UUID) -> dict: - """ - Retrieve the details of a specific node identified by its identifier. - - Parameters: - ----------- - - - node_id (UUID): The identifier of the node to retrieval. - - Returns: - -------- - - - dict: The data of the specified node if found, otherwise None. - """ - if self.graph.has_node(node_id): - return self.graph.nodes[node_id] - return None - - async def get_nodes(self, node_ids: List[UUID] = None) -> List[dict]: - """ - Retrieve data for multiple nodes by their identifiers, or all nodes if no identifiers - are provided. - - Parameters: - ----------- - - - node_ids (List[UUID]): List of node identifiers to fetch data for; if None, - retrieves all nodes in the graph. (default None) - - Returns: - -------- - - - List[dict]: A list of node data for each found node. - """ - if node_ids is None: - return [{"id": node_id, **data} for node_id, data in self.graph.nodes(data=True)] - return [ - {"id": node_id, **self.graph.nodes[node_id]} - for node_id in node_ids - if self.graph.has_node(node_id) - ] diff --git a/cognee/modules/retrieval/cypher_search_retriever.py b/cognee/modules/retrieval/cypher_search_retriever.py index 2329bae51..b885891e8 100644 --- a/cognee/modules/retrieval/cypher_search_retriever.py +++ b/cognee/modules/retrieval/cypher_search_retriever.py @@ -1,6 +1,5 @@ from typing import Any, Optional from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.utils.completion import generate_completion from cognee.modules.retrieval.exceptions import SearchTypeNotSupported, CypherSearchError @@ -31,8 +30,7 @@ class CypherSearchRetriever(BaseRetriever): """ Retrieves relevant context using a cypher query. - If the graph engine is an instance of NetworkXAdapter, raises SearchTypeNotSupported. If - any error occurs during execution, logs the error and raises CypherSearchError. + If any error occurs during execution, logs the error and raises CypherSearchError. Parameters: ----------- @@ -46,12 +44,6 @@ class CypherSearchRetriever(BaseRetriever): """ try: graph_engine = await get_graph_engine() - - if isinstance(graph_engine, NetworkXAdapter): - raise SearchTypeNotSupported( - "CYPHER search type not supported for NetworkXAdapter." - ) - result = await graph_engine.query(query) except Exception as e: logger.error("Failed to execture cypher search retrieval: %s", str(e)) diff --git a/cognee/modules/retrieval/natural_language_retriever.py b/cognee/modules/retrieval/natural_language_retriever.py index d9a77bd7e..276d64de3 100644 --- a/cognee/modules/retrieval/natural_language_retriever.py +++ b/cognee/modules/retrieval/natural_language_retriever.py @@ -1,8 +1,8 @@ from typing import Any, Optional from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter -from cognee.infrastructure.llm.LLMGateway import LLMGateway +from cognee.infrastructure.llm.get_llm_client import get_llm_client +from cognee.infrastructure.llm.prompts import render_prompt from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.exceptions import SearchTypeNotSupported from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface @@ -122,10 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever): query. """ graph_engine = await get_graph_engine() - - if isinstance(graph_engine, (NetworkXAdapter)): - raise SearchTypeNotSupported("Natural language search type not supported.") - return await self._execute_cypher_query(query, graph_engine) async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index dde2fe98d..3ac596494 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -1,6 +1,5 @@ import os import json -import networkx from cognee.shared.logging_utils import get_logger from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage @@ -8,7 +7,37 @@ from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorag logger = get_logger() +def _import_networkx(): + """Dynamically import networkx with helpful error message if not available.""" + try: + import networkx + return networkx + except ImportError: + raise ImportError( + "NetworkX is required for graph visualization but is not installed. " + "Please install it with: pip install 'cognee[visualization]' or pip install networkx" + ) + + async def cognee_network_visualization(graph_data, destination_file_path: str = None): + """ + Generate an interactive HTML visualization of the graph data. + + This function requires NetworkX to be installed. If you don't have NetworkX installed, + you can install it with: pip install 'cognee[visualization]' or pip install networkx + + Args: + graph_data: Tuple of (nodes_data, edges_data) + destination_file_path: Optional path to save the HTML file + + Returns: + str: Path to the generated HTML file + + Raises: + ImportError: If NetworkX is not installed + """ + networkx = _import_networkx() + nodes_data, edges_data = graph_data G = networkx.DiGraph() diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index f965119fd..fb4193a8c 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -3,7 +3,6 @@ import os import requests from datetime import datetime, timezone -import networkx as nx import matplotlib.pyplot as plt import http.server import socketserver diff --git a/pyproject.toml b/pyproject.toml index 095349ff3..c07790e9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,6 @@ dependencies = [ "pypdf>=4.1.0,<6.0.0", "jinja2>=3.1.3,<4", "matplotlib>=3.8.3,<4", - "networkx>=3.4.2,<4", "lancedb>=0.24.0,<1.0.0", "alembic>=1.13.3,<2", "pre-commit>=4.0.1,<5", @@ -120,6 +119,7 @@ gui = [ "qasync>=0.27.1,<0.28", ] graphiti = ["graphiti-core>=0.7.0,<0.8"] +visualization = ["networkx>=3.4.2,<4"] # Note: New s3fs and boto3 versions don't work well together # Always use comaptible fixed versions of these two dependencies aws = ["s3fs[boto3]==2025.3.2"] From 85c7f11eb804e5c6ef725fb8731d757935bf5f41 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 10:23:57 +0100 Subject: [PATCH 43/51] Keep networkx as core dependency --- .../cognee_network_visualization.py | 31 +------------------ pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 31 deletions(-) diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index 3ac596494..dde2fe98d 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -1,5 +1,6 @@ import os import json +import networkx from cognee.shared.logging_utils import get_logger from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage @@ -7,37 +8,7 @@ from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorag logger = get_logger() -def _import_networkx(): - """Dynamically import networkx with helpful error message if not available.""" - try: - import networkx - return networkx - except ImportError: - raise ImportError( - "NetworkX is required for graph visualization but is not installed. " - "Please install it with: pip install 'cognee[visualization]' or pip install networkx" - ) - - async def cognee_network_visualization(graph_data, destination_file_path: str = None): - """ - Generate an interactive HTML visualization of the graph data. - - This function requires NetworkX to be installed. If you don't have NetworkX installed, - you can install it with: pip install 'cognee[visualization]' or pip install networkx - - Args: - graph_data: Tuple of (nodes_data, edges_data) - destination_file_path: Optional path to save the HTML file - - Returns: - str: Path to the generated HTML file - - Raises: - ImportError: If NetworkX is not installed - """ - networkx = _import_networkx() - nodes_data, edges_data = graph_data G = networkx.DiGraph() diff --git a/pyproject.toml b/pyproject.toml index c07790e9e..095349ff3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "pypdf>=4.1.0,<6.0.0", "jinja2>=3.1.3,<4", "matplotlib>=3.8.3,<4", + "networkx>=3.4.2,<4", "lancedb>=0.24.0,<1.0.0", "alembic>=1.13.3,<2", "pre-commit>=4.0.1,<5", @@ -119,7 +120,6 @@ gui = [ "qasync>=0.27.1,<0.28", ] graphiti = ["graphiti-core>=0.7.0,<0.8"] -visualization = ["networkx>=3.4.2,<4"] # Note: New s3fs and boto3 versions don't work well together # Always use comaptible fixed versions of these two dependencies aws = ["s3fs[boto3]==2025.3.2"] From 50ebcedf37dd9d50f340addd64783878db4196ba Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 10:54:11 +0100 Subject: [PATCH 44/51] fix: natural_language_retriever.py --- cognee/modules/retrieval/natural_language_retriever.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cognee/modules/retrieval/natural_language_retriever.py b/cognee/modules/retrieval/natural_language_retriever.py index 276d64de3..38d19eb56 100644 --- a/cognee/modules/retrieval/natural_language_retriever.py +++ b/cognee/modules/retrieval/natural_language_retriever.py @@ -1,8 +1,7 @@ from typing import Any, Optional from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.llm.get_llm_client import get_llm_client -from cognee.infrastructure.llm.prompts import render_prompt +from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.exceptions import SearchTypeNotSupported from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface @@ -122,6 +121,10 @@ class NaturalLanguageRetriever(BaseRetriever): query. """ graph_engine = await get_graph_engine() + + if isinstance(graph_engine, (NetworkXAdapter)): + raise SearchTypeNotSupported("Natural language search type not supported.") + return await self._execute_cypher_query(query, graph_engine) async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: From 8d045b942e2e52f70c77a9ae3d89c92d96c4236a Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 10:56:19 +0100 Subject: [PATCH 45/51] chore: forgot to remove NetworkX check in NLPRetriever --- cognee/modules/retrieval/natural_language_retriever.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cognee/modules/retrieval/natural_language_retriever.py b/cognee/modules/retrieval/natural_language_retriever.py index 38d19eb56..9797c28a0 100644 --- a/cognee/modules/retrieval/natural_language_retriever.py +++ b/cognee/modules/retrieval/natural_language_retriever.py @@ -122,9 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever): """ graph_engine = await get_graph_engine() - if isinstance(graph_engine, (NetworkXAdapter)): - raise SearchTypeNotSupported("Natural language search type not supported.") - return await self._execute_cypher_query(query, graph_engine) async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: From 8825aaad9f0691132fe03681d94eedc147ffad90 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 11:01:08 +0100 Subject: [PATCH 46/51] format: ruff format --- cognee/infrastructure/databases/graph/get_graph_engine.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 229fb27c4..4ec0eb483 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -21,7 +21,6 @@ async def get_graph_engine() -> GraphDBInterface: if hasattr(graph_client, "initialize"): await graph_client.initialize() - return graph_client From 1ee697b00090f1600960af825384c92825d4c310 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 15 Aug 2025 12:27:03 +0200 Subject: [PATCH 47/51] fix: Return distributed as part of Cognee build --- cognee/infrastructure/loaders/LoaderInterface.py | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/loaders/LoaderInterface.py b/cognee/infrastructure/loaders/LoaderInterface.py index f0e91adee..3a1c9bf3e 100644 --- a/cognee/infrastructure/loaders/LoaderInterface.py +++ b/cognee/infrastructure/loaders/LoaderInterface.py @@ -58,7 +58,7 @@ class LoaderInterface(ABC): pass @abstractmethod - async def load(self, file_path: str, file_stream: Optional[Any] = None, **kwargs): + async def load(self, file_path: str, **kwargs): """ Load and process the file, returning standardized result. diff --git a/pyproject.toml b/pyproject.toml index 095349ff3..96013fb6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "cognee" -version = "0.2.2" +version = "0.2.3.dev0" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = [ { name = "Vasilije Markovic" }, @@ -156,7 +156,6 @@ exclude = [ "/.data", "/.github", "/alembic", - "/distributed", "/deployment", "/cognee-mcp", "/cognee-frontend", From 86e51350a4f1ffba6ca703ebc8ccc5a895108455 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 15 Aug 2025 13:42:48 +0200 Subject: [PATCH 48/51] chore: Update Cognee version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 96013fb6e..3ec97506b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "cognee" -version = "0.2.3.dev0" +version = "0.2.3.dev1" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = [ { name = "Vasilije Markovic" }, From 761bb1b523262c5ce6903d099141c43214d183b4 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 12:59:49 +0100 Subject: [PATCH 49/51] test: deprecate networkx_metrics_test.py --- .../tasks/descriptive_metrics/networkx_metrics_test.py | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py diff --git a/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py b/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py deleted file mode 100644 index 32e61b7d9..000000000 --- a/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +++ /dev/null @@ -1,7 +0,0 @@ -from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import assert_metrics -import asyncio - - -if __name__ == "__main__": - asyncio.run(assert_metrics(provider="networkx", include_optional=False)) - asyncio.run(assert_metrics(provider="networkx", include_optional=True)) From 5f7598d59d464e35afd06dbb0b0ecd0b3fa6fe33 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 13:13:15 +0100 Subject: [PATCH 50/51] test: use neo4j_metrics_test in descriptive tests instead of networkx --- .github/workflows/examples_tests.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index 277ed8c21..7352654d3 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -71,7 +71,11 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - run: uv run python ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py + GRAPH_DATABASE_PROVIDER: "neo4j" + GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }} + GRAPH_DATABASE_USERNAME: "neo4j" + GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} + run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py test-dynamic-steps-metrics: From e4e05128560b0a86fb7804fb6cc7c5990c26eaa2 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 15 Aug 2025 13:29:54 +0100 Subject: [PATCH 51/51] feat: add reusable GitHub Action to set up Neo4j with Graph Data Science for testing --- .github/actions/setup_neo4j/action.yml | 67 ++++++++++++++++++++++++++ .github/workflows/examples_tests.yml | 10 ++-- 2 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 .github/actions/setup_neo4j/action.yml diff --git a/.github/actions/setup_neo4j/action.yml b/.github/actions/setup_neo4j/action.yml new file mode 100644 index 000000000..20b7726b1 --- /dev/null +++ b/.github/actions/setup_neo4j/action.yml @@ -0,0 +1,67 @@ +name: 'Setup Neo4j with Graph Data Science' +description: 'Sets up a Neo4j instance with APOC and Graph Data Science plugins for testing' +inputs: + neo4j-version: + description: 'Neo4j version to use' + required: false + default: '5.21' + neo4j-password: + description: 'Password for Neo4j' + required: false + default: 'cognee_test_password' +outputs: + neo4j-url: + description: 'Neo4j connection URL' + value: 'bolt://localhost:7687' + neo4j-username: + description: 'Neo4j username' + value: 'neo4j' + neo4j-password: + description: 'Neo4j password' + value: ${{ inputs.neo4j-password }} +runs: + using: 'composite' + steps: + - name: Start Neo4j with GDS + shell: bash + run: | + docker run -d \ + --name neo4j-test \ + -p 7474:7474 -p 7687:7687 \ + -e NEO4J_AUTH="neo4j/${{ inputs.neo4j-password }}" \ + -e NEO4J_PLUGINS='["apoc", "graph-data-science"]' \ + -e NEO4J_dbms_security_procedures_unrestricted="apoc.*,gds.*" \ + -e NEO4J_apoc_export_file_enabled=true \ + -e NEO4J_apoc_import_file_enabled=true \ + neo4j:${{ inputs.neo4j-version }} + + - name: Wait for Neo4j to be ready + shell: bash + run: | + echo "Waiting for Neo4j to start..." + timeout=60 + counter=0 + + while [ $counter -lt $timeout ]; do + if docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" "RETURN 1" > /dev/null 2>&1; then + echo "Neo4j is ready!" + break + fi + echo "Waiting... ($counter/$timeout)" + sleep 2 + counter=$((counter + 2)) + done + + if [ $counter -ge $timeout ]; then + echo "Neo4j failed to start within $timeout seconds" + docker logs neo4j-test + exit 1 + fi + + - name: Verify GDS is available + shell: bash + run: | + echo "Verifying Graph Data Science library is available..." + docker exec neo4j-test cypher-shell -u neo4j -p "${{ inputs.neo4j-password }}" \ + "CALL gds.version() YIELD gdsVersion RETURN gdsVersion" + echo "GDS verification complete!" diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index 7352654d3..1dc720f8e 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -60,6 +60,10 @@ jobs: with: python-version: '3.11.x' + - name: Setup Neo4j with GDS + uses: ./.github/actions/setup_neo4j + id: neo4j + - name: Run Descriptive Graph Metrics Example env: LLM_MODEL: ${{ secrets.LLM_MODEL }} @@ -72,9 +76,9 @@ jobs: EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} GRAPH_DATABASE_PROVIDER: "neo4j" - GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }} - GRAPH_DATABASE_USERNAME: "neo4j" - GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }} + GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }} + GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }} + GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }} run: uv run python ./cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py