feat: removing invalidValueErrors

2025-08-13 14:42:57 +02:00 · 2025-08-13 14:42:57 +02:00 · 544e08930b
commit 544e08930b
parent 38329da0e8
19 changed files with 62 additions and 69 deletions
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -221,14 +221,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
    - The actual cognify process may take significant time depending on text length
    - Use the cognify_status tool to check the progress of the operation

-    Raises
-    ------
-    InvalidValueError
-        If LLM_API_KEY is not set
-    ValueError
-        If chunks exceed max token limits (reduce chunk_size)
-    DatabaseNotCreatedError
-        If databases are not properly initialized
    """

    async def cognify_task(
@ -512,14 +504,6 @@ async def search(search_query: str, search_type: str) -> list:
    - Different search types produce different output formats
    - The function handles the conversion between Cognee's internal result format and MCP's output format

-    Raises
-    ------
-    InvalidValueError
-        If LLM_API_KEY is not set (for LLM-based search types)
-    ValueError
-        If query_text is empty or search parameters are invalid
-    NoDataError
-        If no relevant data found for the search query
    """

    async def search_task(search_query: str, search_type: str) -> str:
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -133,11 +133,6 @@ async def add(
        - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate"
        - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"

-    Raises:
-        FileNotFoundError: If specified file paths don't exist
-        PermissionError: If user lacks access to files or dataset
-        UnsupportedFileTypeError: If file format cannot be processed
-        InvalidValueError: If LLM_API_KEY is not set or invalid
    """
    tasks = [
        Task(resolve_data_directories, include_subdirectories=True),
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -177,14 +177,6 @@ async def cognify(
        - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
        - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
        - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
-
-    Raises:
-        DatasetNotFoundError: If specified datasets don't exist
-        PermissionError: If user lacks processing rights
-        InvalidValueError: If LLM_API_KEY is not set
-        OntologyParsingError: If ontology file is malformed
-        ValueError: If chunks exceed max token limits (reduce chunk_size)
-        DatabaseNotCreatedError: If databases are not properly initialized
    """
    tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)

--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -158,13 +158,6 @@ async def search(
        - VECTOR_DB_PROVIDER: Must match what was used during cognify
        - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify

-    Raises:
-        DatasetNotFoundError: If specified datasets don't exist or aren't accessible
-        PermissionDeniedError: If user lacks read access to requested datasets
-        NoDataError: If no relevant data found for the search query
-        InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
-        ValueError: If query_text is empty or search parameters are invalid
-        CollectionNotFoundError: If vector collection not found (data not processed)
    """
    # We use lists from now on for datasets
    if isinstance(datasets, UUID) or isinstance(datasets, str):
--- a/cognee/infrastructure/data/utils/extract_keywords.py
+++ b/cognee/infrastructure/data/utils/extract_keywords.py
@ -8,7 +8,7 @@ def extract_keywords(text: str) -> list[str]:
    """
    Extract keywords from the provided text string.

-    This function raises an InvalidValueError if the input text is empty. It processes the
+    This function raises an KeyWordExtractionError if the input text is empty. It processes the
    text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
    relevant keywords based on their frequency. The function returns a list of up to 15
    keywords, each having more than 3 characters.
--- a/cognee/infrastructure/databases/exceptions/init.py
+++ b/cognee/infrastructure/databases/exceptions/init.py
@ -9,4 +9,6 @@ from .exceptions import (
    EntityAlreadyExistsError,
    DatabaseNotCreatedError,
    EmbeddingException,
+    MissingQueryParameterError,
+    MutuallyExclusiveQueryParametersError
 )
--- a/cognee/infrastructure/databases/exceptions/exceptions.py
+++ b/cognee/infrastructure/databases/exceptions/exceptions.py
@ -102,3 +102,31 @@ class EmbeddingException(CogneeConfigurationError):
        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
    ):
        super().__init__(message, name, status_code)
+
+class MissingQueryParameterError(CogneeValidationError):
+    """
+    Raised when neither 'query_text' nor 'query_vector' is provided,
+    and at least one is required to perform the operation.
+    """
+    def __init__(
+        self,
+        name: str = "MissingQueryParameterError",
+        status_code: int = status.HTTP_400_BAD_REQUEST,
+    ):
+        message = "One of query_text or query_vector must be provided!"
+        super().__init__(message, name, status_code)
+
+class MutuallyExclusiveQueryParametersError(CogneeValidationError):
+    """
+    Raised when both 'text' and 'embedding' are provided to the search function,
+    but only one type of input is allowed at a time.
+    """
+    def __init__(
+        self,
+        name: str = "MutuallyExclusiveQueryParametersError",
+        status_code: int = status.HTTP_400_BAD_REQUEST,
+    ):
+        message = (
+            "The search function accepts either text or embedding as input, but not both."
+        )
+        super().__init__(message, name, status_code)
--- a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py
+++ b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py
@ -9,7 +9,7 @@ from typing import List, Dict, Any, Optional, Tuple, Type, Union

 from falkordb import FalkorDB

-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
 from cognee.infrastructure.databases.graph.graph_db_interface import (
    GraphDBInterface,
    record_graph_changes,
@ -721,7 +721,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
            Returns the search results as a result set from the graph database.
        """
        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if query_text and not query_vector:
            query_vector = (await self.embed_data([query_text]))[0]
--- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
+++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
@ -5,7 +5,8 @@ import json
 from typing import List, Optional, Any, Dict, Type, Tuple
 from uuid import UUID

-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
+from cognee.infrastructure.databases.exceptions import MutuallyExclusiveQueryParametersError
 from cognee.infrastructure.databases.graph.neptune_driver.adapter import NeptuneGraphDB
 from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
 from cognee.infrastructure.engine import DataPoint
@ -274,11 +275,9 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
            limit = self._TOPK_UPPER_BOUND

        if query_vector and query_text:
-            raise InvalidValueError(
-                message="The search function accepts either text or embedding as input, but not both."
-            )
+            raise MutuallyExclusiveQueryParametersError()
        elif query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()
        elif query_vector:
            embedding = query_vector
        else:
--- a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py
@ -3,13 +3,13 @@ from uuid import UUID
 from typing import List, Optional
 from chromadb import AsyncHttpClient, Settings

-from cognee.exceptions import InvalidValueError
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.storage.utils import get_own_properties
 from cognee.infrastructure.engine import DataPoint
 from cognee.infrastructure.engine.utils import parse_id
 from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
 from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError

 from ..embeddings.EmbeddingEngine import EmbeddingEngine
 from ..vector_db_interface import VectorDBInterface
@ -373,7 +373,7 @@ class ChromaDBAdapter(VectorDBInterface):
            Returns a list of ScoredResult instances representing the search results.
        """
        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if query_text and not query_vector:
            query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -5,7 +5,7 @@ from pydantic import BaseModel
 from lancedb.pydantic import LanceModel, Vector
 from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints

-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
 from cognee.infrastructure.engine import DataPoint
 from cognee.infrastructure.engine.utils import parse_id
 from cognee.infrastructure.files.storage import get_file_storage
@ -228,7 +228,7 @@ class LanceDBAdapter(VectorDBInterface):
        normalized: bool = True,
    ):
        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if query_text and not query_vector:
            query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine

 from distributed.utils import override_distributed
 from distributed.tasks.queued_add_data_points import queued_add_data_points
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError

 from ...relational.ModelBase import Base
 from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
        with_vector: bool = False,
    ) -> List[ScoredResult]:
        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if query_text and not query_vector:
            query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -4,7 +4,7 @@ from qdrant_client import AsyncQdrantClient, models

 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.engine.utils import parse_id
-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
 from cognee.infrastructure.engine import DataPoint
 from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
 from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
@ -363,7 +363,7 @@ class QDrantAdapter(VectorDBInterface):
        Search for data points in a collection based on either a textual query or a vector
        query.

-        Raises InvalidValueError if both query_text and query_vector are None.
+        Raises MissingQueryParameterError if both query_text and query_vector are None.

        Returns a list of scored results that match the search criteria.

@ -388,7 +388,7 @@ class QDrantAdapter(VectorDBInterface):
        from qdrant_client.http.exceptions import UnexpectedResponse

        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if not await self.has_collection(collection_name):
            return []
--- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
+++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
@ -3,7 +3,7 @@ from typing import List, Optional
 from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential

 from cognee.shared.logging_utils import get_logger
-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
 from cognee.infrastructure.engine import DataPoint
 from cognee.infrastructure.engine.utils import parse_id
 from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
@ -390,7 +390,7 @@ class WeaviateAdapter(VectorDBInterface):
        """
        Perform a search on a collection using either a text query or a vector query.

-        Return scored results based on the search criteria provided. Raise InvalidValueError if
+        Return scored results based on the search criteria provided. Raise MissingQueryParameterError if
        no query is provided.

        Parameters:
@ -413,7 +413,7 @@ class WeaviateAdapter(VectorDBInterface):
        import weaviate.exceptions

        if query_text is None and query_vector is None:
-            raise InvalidValueError(message="One of query_text or query_vector must be provided!")
+            raise MissingQueryParameterError()

        if query_vector is None:
            query_vector = (await self.embed_data([query_text]))[0]
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@ -2,7 +2,7 @@ from typing import Type
 from pydantic import BaseModel
 import instructor

-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface):
        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
-            raise InvalidValueError(message="No system prompt path provided.")
+            raise MissingSystemPromptPathError()

        system_prompt = LLMGateway.read_query_prompt(system_prompt)

--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError

 from cognee.shared.logging_utils import get_logger
 from cognee.modules.observability.get_observe import get_observe
-from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
    LLMInterface,
 )
@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface):
        """
        Format and display the prompt for a user query.

-        Raises an InvalidValueError if no system prompt is provided.
+        Raises an MissingQueryParameterError if no system prompt is provided.

        Parameters:
        -----------
@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface):
        if not text_input:
            text_input = "No user input provided."
        if not system_prompt:
-            raise InvalidValueError(message="No system prompt path provided.")
+            raise MissingSystemPromptPathError()
        system_prompt = LLMGateway.read_query_prompt(system_prompt)

        formatted_prompt = (
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
@ -38,7 +38,7 @@ def get_llm_client():

    This function retrieves the configuration for the LLM provider and model, and
    initializes the appropriate LLM client adapter accordingly. It raises an
-    InvalidValueError if the LLM API key is not set for certain providers or if the provider
+    LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider
    is unsupported.

    Returns:
@ -62,7 +62,7 @@ def get_llm_client():

    if provider == LLMProvider.OPENAI:
        if llm_config.llm_api_key is None:
-            raise InvalidValueError(message="LLM API key is not set.")
+            raise LLMAPIKeyNotSetError()

        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
            OpenAIAdapter,
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@ -307,7 +307,7 @@ class OpenAIAdapter(LLMInterface):
        Format and display the prompt for a user query.

        This method formats the prompt using the provided user input and system prompt,
-        returning a string representation. Raises InvalidValueError if the system prompt is not
+        returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not
        provided.

        Parameters:
--- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py
+++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py
@ -1,9 +1,8 @@
 import numpy as np
 import pytest

-from cognee.exceptions import InvalidValueError
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node
-
+from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError

 def test_node_initialization():
    """Test that a Node is initialized correctly."""
@ -16,7 +15,7 @@ def test_node_initialization():

 def test_node_invalid_dimension():
    """Test that initializing a Node with a non-positive dimension raises an error."""
-    with pytest.raises(InvalidValueError, match="Dimension must be a positive integer"):
+    with pytest.raises(InvalidDimensionsError, match="Dimensions must be a positive integers"):
        Node("node1", dimension=0)


@ -69,7 +68,7 @@ def test_is_node_alive_in_dimension():
 def test_node_alive_invalid_dimension():
    """Test that checking alive status with an invalid dimension raises an error."""
    node = Node("node1", dimension=1)
-    with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"):
+    with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"):
        node.is_node_alive_in_dimension(1)


@ -106,7 +105,7 @@ def test_edge_invalid_dimension():
    """Test that initializing an Edge with a non-positive dimension raises an error."""
    node1 = Node("node1")
    node2 = Node("node2")
-    with pytest.raises(InvalidValueError, match="Dimensions must be a positive integer."):
+    with pytest.raises(DimensionOutOfRangeError, match="Dimensions must be a positive integer."):
        Edge(node1, node2, dimension=0)


@ -125,7 +124,7 @@ def test_edge_alive_invalid_dimension():
    node1 = Node("node1")
    node2 = Node("node2")
    edge = Edge(node1, node2, dimension=1)
-    with pytest.raises(InvalidValueError, match="Dimension 1 is out of range"):
+    with pytest.raises(DimensionOutOfRangeError, match="Dimension 1 is out of range"):
        edge.is_edge_alive_in_dimension(1)