From 544e08930b3ab43f5a936024e9d41542e60ec297 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:42:57 +0200 Subject: [PATCH] feat: removing invalidValueErrors --- cognee-mcp/src/server.py | 16 ----------- cognee/api/v1/add/add.py | 5 ---- cognee/api/v1/cognify/cognify.py | 8 ------ cognee/api/v1/search/search.py | 7 ----- .../data/utils/extract_keywords.py | 2 +- .../databases/exceptions/__init__.py | 2 ++ .../databases/exceptions/exceptions.py | 28 +++++++++++++++++++ .../hybrid/falkordb/FalkorDBAdapter.py | 4 +-- .../NeptuneAnalyticsAdapter.py | 9 +++--- .../vector/chromadb/ChromaDBAdapter.py | 4 +-- .../vector/lancedb/LanceDBAdapter.py | 4 +-- .../vector/pgvector/PGVectorAdapter.py | 3 +- .../databases/vector/qdrant/QDrantAdapter.py | 6 ++-- .../vector/weaviate_db/WeaviateAdapter.py | 6 ++-- .../llm/anthropic/adapter.py | 4 +-- .../litellm_instructor/llm/gemini/adapter.py | 6 ++-- .../litellm_instructor/llm/get_llm_client.py | 4 +-- .../litellm_instructor/llm/openai/adapter.py | 2 +- .../graph/cognee_graph_elements_test.py | 11 ++++---- 19 files changed, 62 insertions(+), 69 deletions(-) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index a657225f5..3e65a5eb7 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -221,14 +221,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str - The actual cognify process may take significant time depending on text length - Use the cognify_status tool to check the progress of the operation - Raises - ------ - InvalidValueError - If LLM_API_KEY is not set - ValueError - If chunks exceed max token limits (reduce chunk_size) - DatabaseNotCreatedError - If databases are not properly initialized """ async def cognify_task( @@ -512,14 +504,6 @@ async def search(search_query: str, search_type: str) -> list: - Different search types produce different output formats - The function handles the conversion between Cognee's internal result format and MCP's output format - Raises - ------ - InvalidValueError - If LLM_API_KEY is not set (for LLM-based search types) - ValueError - If query_text is empty or search parameters are invalid - NoDataError - If no relevant data found for the search query """ async def search_task(search_query: str, search_type: str) -> str: diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 3e4aaae49..a9ad42923 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -133,11 +133,6 @@ async def add( - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate" - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx" - Raises: - FileNotFoundError: If specified file paths don't exist - PermissionError: If user lacks access to files or dataset - UnsupportedFileTypeError: If file format cannot be processed - InvalidValueError: If LLM_API_KEY is not set or invalid """ tasks = [ Task(resolve_data_directories, include_subdirectories=True), diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index c6508f3a7..23984b9a6 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -177,14 +177,6 @@ async def cognify( - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) - - Raises: - DatasetNotFoundError: If specified datasets don't exist - PermissionError: If user lacks processing rights - InvalidValueError: If LLM_API_KEY is not set - OntologyParsingError: If ontology file is malformed - ValueError: If chunks exceed max token limits (reduce chunk_size) - DatabaseNotCreatedError: If databases are not properly initialized """ tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 66ce48cc2..f4f4831c1 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -158,13 +158,6 @@ async def search( - VECTOR_DB_PROVIDER: Must match what was used during cognify - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify - Raises: - DatasetNotFoundError: If specified datasets don't exist or aren't accessible - PermissionDeniedError: If user lacks read access to requested datasets - NoDataError: If no relevant data found for the search query - InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types) - ValueError: If query_text is empty or search parameters are invalid - CollectionNotFoundError: If vector collection not found (data not processed) """ # We use lists from now on for datasets if isinstance(datasets, UUID) or isinstance(datasets, str): diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index 2915131a4..8085459c9 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -8,7 +8,7 @@ def extract_keywords(text: str) -> list[str]: """ Extract keywords from the provided text string. - This function raises an InvalidValueError if the input text is empty. It processes the + This function raises an KeyWordExtractionError if the input text is empty. It processes the text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most relevant keywords based on their frequency. The function returns a list of up to 15 keywords, each having more than 3 characters. diff --git a/cognee/infrastructure/databases/exceptions/__init__.py b/cognee/infrastructure/databases/exceptions/__init__.py index c7d2a8feb..56deaac74 100644 --- a/cognee/infrastructure/databases/exceptions/__init__.py +++ b/cognee/infrastructure/databases/exceptions/__init__.py @@ -9,4 +9,6 @@ from .exceptions import ( EntityAlreadyExistsError, DatabaseNotCreatedError, EmbeddingException, + MissingQueryParameterError, + MutuallyExclusiveQueryParametersError ) diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 66740fa5e..6e1bb74f6 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -102,3 +102,31 @@ class EmbeddingException(CogneeConfigurationError): status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, ): super().__init__(message, name, status_code) + +class MissingQueryParameterError(CogneeValidationError): + """ + Raised when neither 'query_text' nor 'query_vector' is provided, + and at least one is required to perform the operation. + """ + def __init__( + self, + name: str = "MissingQueryParameterError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = "One of query_text or query_vector must be provided!" + super().__init__(message, name, status_code) + +class MutuallyExclusiveQueryParametersError(CogneeValidationError): + """ + Raised when both 'text' and 'embedding' are provided to the search function, + but only one type of input is allowed at a time. + """ + def __init__( + self, + name: str = "MutuallyExclusiveQueryParametersError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + message = ( + "The search function accepts either text or embedding as input, but not both." + ) + super().__init__(message, name, status_code) diff --git a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py index 35ce7c77e..cb6899925 100644 --- a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +++ b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py @@ -9,7 +9,7 @@ from typing import List, Dict, Any, Optional, Tuple, Type, Union from falkordb import FalkorDB -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.databases.graph.graph_db_interface import ( GraphDBInterface, record_graph_changes, @@ -721,7 +721,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface): Returns the search results as a result set from the graph database. """ if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embed_data([query_text]))[0] diff --git a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py index a04e6f09e..4baf8ff13 100644 --- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py @@ -5,7 +5,8 @@ import json from typing import List, Optional, Any, Dict, Type, Tuple from uuid import UUID -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError +from cognee.infrastructure.databases.exceptions import MutuallyExclusiveQueryParametersError from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface from cognee.infrastructure.engine import DataPoint @@ -274,11 +275,9 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface): limit = self._TOPK_UPPER_BOUND if query_vector and query_text: - raise InvalidValueError( - message="The search function accepts either text or embedding as input, but not both." - ) + raise MutuallyExclusiveQueryParametersError() elif query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() elif query_vector: embedding = query_vector else: diff --git a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py index d4b858348..81f47dddd 100644 --- a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +++ b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py @@ -3,13 +3,13 @@ from uuid import UUID from typing import List, Optional from chromadb import AsyncHttpClient, Settings -from cognee.exceptions import InvalidValueError from cognee.shared.logging_utils import get_logger from cognee.modules.storage.utils import get_own_properties from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from ..embeddings.EmbeddingEngine import EmbeddingEngine from ..vector_db_interface import VectorDBInterface @@ -373,7 +373,7 @@ class ChromaDBAdapter(VectorDBInterface): Returns a list of ScoredResult instances representing the search results. """ if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index f37c83113..0184ec3ee 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from lancedb.pydantic import LanceModel, Vector from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.files.storage import get_file_storage @@ -228,7 +228,7 @@ class LanceDBAdapter(VectorDBInterface): normalized: bool = True, ): if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 96b2056c4..4dfd9792f 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from distributed.utils import override_distributed from distributed.tasks.queued_add_data_points import queued_add_data_points +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from ...relational.ModelBase import Base from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter @@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): with_vector: bool = False, ) -> List[ScoredResult]: if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_text and not query_vector: query_vector = (await self.embedding_engine.embed_text([query_text]))[0] diff --git a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py index 716fc969f..aa1368716 100644 --- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py @@ -4,7 +4,7 @@ from qdrant_client import AsyncQdrantClient, models from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine.utils import parse_id -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult @@ -363,7 +363,7 @@ class QDrantAdapter(VectorDBInterface): Search for data points in a collection based on either a textual query or a vector query. - Raises InvalidValueError if both query_text and query_vector are None. + Raises MissingQueryParameterError if both query_text and query_vector are None. Returns a list of scored results that match the search criteria. @@ -388,7 +388,7 @@ class QDrantAdapter(VectorDBInterface): from qdrant_client.http.exceptions import UnexpectedResponse if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if not await self.has_collection(collection_name): return [] diff --git a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py index 00a6a0411..db68e004e 100644 --- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py @@ -3,7 +3,7 @@ from typing import List, Optional from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential from cognee.shared.logging_utils import get_logger -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError @@ -390,7 +390,7 @@ class WeaviateAdapter(VectorDBInterface): """ Perform a search on a collection using either a text query or a vector query. - Return scored results based on the search criteria provided. Raise InvalidValueError if + Return scored results based on the search criteria provided. Raise MissingQueryParameterError if no query is provided. Parameters: @@ -413,7 +413,7 @@ class WeaviateAdapter(VectorDBInterface): import weaviate.exceptions if query_text is None and query_vector is None: - raise InvalidValueError(message="One of query_text or query_vector must be provided!") + raise MissingQueryParameterError() if query_vector is None: query_vector = (await self.embed_data([query_text]))[0] diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index 636e6c0f2..6845fb6aa 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -2,7 +2,7 @@ from typing import Type from pydantic import BaseModel import instructor -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) @@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface): if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 61d42ff5f..3cde1fdc4 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError from cognee.shared.logging_utils import get_logger from cognee.modules.observability.get_observe import get_observe -from cognee.exceptions import InvalidValueError +from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) @@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface): """ Format and display the prompt for a user query. - Raises an InvalidValueError if no system prompt is provided. + Raises an MissingQueryParameterError if no system prompt is provided. Parameters: ----------- @@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface): if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) formatted_prompt = ( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 3006a795b..fd347aef3 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -38,7 +38,7 @@ def get_llm_client(): This function retrieves the configuration for the LLM provider and model, and initializes the appropriate LLM client adapter accordingly. It raises an - InvalidValueError if the LLM API key is not set for certain providers or if the provider + LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider is unsupported. Returns: @@ -62,7 +62,7 @@ def get_llm_client(): if provider == LLMProvider.OPENAI: if llm_config.llm_api_key is None: - raise InvalidValueError(message="LLM API key is not set.") + raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import ( OpenAIAdapter, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 38e1bc82e..4126b3e13 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -307,7 +307,7 @@ class OpenAIAdapter(LLMInterface): Format and display the prompt for a user query. This method formats the prompt using the provided user input and system prompt, - returning a string representation. Raises InvalidValueError if the system prompt is not + returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not provided. Parameters: diff --git a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py index 000856b12..a852bcee3 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py @@ -1,9 +1,8 @@ import numpy as np import pytest -from cognee.exceptions import InvalidValueError from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node - +from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError def test_node_initialization(): """Test that a Node is initialized correctly.""" @@ -16,7 +15,7 @@ def test_node_initialization(): def test_node_invalid_dimension(): """Test that initializing a Node with a non-positive dimension raises an error.""" - with pytest.raises(InvalidValueError, match="Dimension must be a positive integer"): + with pytest.raises(InvalidDimensionsError, match="Dimensions must be a positive integers"): Node("node1", dimension=0) @@ -69,7 +68,7 @@ def test_is_node_alive_in_dimension(): def test_node_alive_invalid_dimension(): """Test that checking alive status with an invalid dimension raises an error.""" node = Node("node1", dimension=1) - with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): node.is_node_alive_in_dimension(1) @@ -106,7 +105,7 @@ def test_edge_invalid_dimension(): """Test that initializing an Edge with a non-positive dimension raises an error.""" node1 = Node("node1") node2 = Node("node2") - with pytest.raises(InvalidValueError, match="Dimensions must be a positive integer."): + with pytest.raises(DimensionOutOfRangeError, match="Dimensions must be a positive integer."): Edge(node1, node2, dimension=0) @@ -125,7 +124,7 @@ def test_edge_alive_invalid_dimension(): node1 = Node("node1") node2 = Node("node2") edge = Edge(node1, node2, dimension=1) - with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"): + with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"): edge.is_edge_alive_in_dimension(1)