new error system

2025-07-19 22:44:26 +02:00 · 2025-07-19 22:44:26 +02:00 · 19e22d14b8
commit 19e22d14b8
parent dc03a52541
8 changed files with 1106 additions and 137 deletions
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -15,6 +15,7 @@ from fastapi.exceptions import RequestValidationError
 from fastapi.openapi.utils import get_openapi
 from cognee.exceptions import CogneeApiError
 from cognee.exceptions.enhanced_exceptions import CogneeBaseError
 from cognee.shared.logging_utils import get_logger, setup_logging
 from cognee.api.v1.permissions.routers import get_permissions_router
 from cognee.api.v1.settings.routers import get_settings_router
@ -120,8 +121,27 @@ async def request_validation_exception_handler(request: Request, exc: RequestVal
    )
@app.exception_handler(CogneeBaseError)
 async def enhanced_exception_handler(_: Request, exc: CogneeBaseError) -> JSONResponse:
    """
    Enhanced exception handler for the new exception hierarchy.
    Provides standardized error responses with rich context and user guidance.
    """
    # Log the full stack trace for debugging
    logger.error(f"Enhanced exception caught: {exc.__class__.__name__}", exc_info=True)
    # Create standardized error response
    error_response = {"error": exc.to_dict()}
    return JSONResponse(status_code=exc.status_code, content=error_response)
@app.exception_handler(CogneeApiError)
-async def exception_handler(_: Request, exc: CogneeApiError) -> JSONResponse:
+async def legacy_exception_handler(_: Request, exc: CogneeApiError) -> JSONResponse:
    """
    Legacy exception handler for backward compatibility.
    Handles old CogneeApiError instances with fallback formatting.
    """
    detail = {}
    if exc.name and exc.message and exc.status_code:
@ -136,7 +156,54 @@ async def exception_handler(_: Request, exc: CogneeApiError) -> JSONResponse:
    # log the stack trace for easier serverside debugging
    logger.error(format_exc())
-    return JSONResponse(status_code=status_code, content={"detail": detail["message"]})
+
    # Convert to new format for consistency
    error_response = {
        "error": {
            "type": exc.__class__.__name__,
            "message": detail["message"],
            "technical_message": detail["message"],
            "suggestions": [
                "Check the logs for more details",
                "Try again or contact support if the issue persists",
            ],
            "docs_link": "https://docs.cognee.ai/troubleshooting",
            "is_retryable": False,
            "context": {},
            "operation": None,
        }
    }
    return JSONResponse(status_code=status_code, content=error_response)
@app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
    """
    Global exception handler for any unhandled exceptions.
    Ensures all errors return a consistent format.
    """
    logger.error(f"Unhandled exception in {request.url.path}: {str(exc)}", exc_info=True)
    # Create a standardized error response for unexpected errors
    error_response = {
        "error": {
            "type": "UnexpectedError",
            "message": "An unexpected error occurred. Please try again.",
            "technical_message": str(exc) if app_environment != "prod" else "Internal server error",
            "suggestions": [
                "Try your request again",
                "Check if the issue persists",
                "Contact support if the problem continues",
            ],
            "docs_link": "https://docs.cognee.ai/troubleshooting",
            "is_retryable": True,
            "context": {"path": str(request.url.path), "method": request.method},
            "operation": None,
        }
    }
    return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=error_response)
@app.get("/")
--- a/cognee/api/v1/add/routers/get_add_router.py
+++ b/cognee/api/v1/add/routers/get_add_router.py
@ -11,6 +11,13 @@ import requests
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.exceptions import (
    UnsupportedFileFormatError,
    FileAccessError,
    DatasetNotFoundError,
    CogneeValidationError,
    CogneeSystemError,
 )
 logger = get_logger()
@ -49,49 +56,143 @@ def get_add_router() -> APIRouter:
        - Any relevant metadata from the ingestion process
        ## Error Codes
-        - **400 Bad Request**: Neither datasetId nor datasetName provided
+        - **400 Bad Request**: Missing required parameters or invalid input
-        - **409 Conflict**: Error during add operation
+        - **422 Unprocessable Entity**: Unsupported file format or validation error
        - **403 Forbidden**: User doesn't have permission to add to dataset
        - **500 Internal Server Error**: System error during processing
        ## Notes
        - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
        - GitHub repositories are cloned and all files are processed
        - HTTP URLs are fetched and their content is processed
        - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
        - Enhanced error messages provide specific guidance for fixing issues
        """
        from cognee.api.v1.add import add as cognee_add
        # Input validation with enhanced exceptions
        if not datasetId and not datasetName:
-            raise ValueError("Either datasetId or datasetName must be provided.")
+            raise CogneeValidationError(
                message="Either datasetId or datasetName must be provided",
                user_message="You must specify either a dataset name or dataset ID.",
                suggestions=[
                    "Provide a datasetName parameter (e.g., 'my_dataset')",
                    "Provide a datasetId parameter with a valid UUID",
                    "Check the API documentation for parameter examples",
                ],
                docs_link="https://docs.cognee.ai/api/add",
                context={"provided_dataset_name": datasetName, "provided_dataset_id": datasetId},
                operation="add",
            )
-        try:
+        if not data or len(data) == 0:
-            if (
+            raise CogneeValidationError(
-                isinstance(data, str)
+                message="No data provided for upload",
-                and data.startswith("http")
+                user_message="You must provide data to add to the dataset.",
-                and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
+                suggestions=[
-            ):
+                    "Upload one or more files",
-                if "github" in data:
+                    "Provide a valid URL (if URL processing is enabled)",
                    "Check that your request includes the data parameter",
                ],
                docs_link="https://docs.cognee.ai/guides/adding-data",
                operation="add",
            )
        logger.info(
            f"Adding {len(data)} items to dataset",
            extra={
                "dataset_name": datasetName,
                "dataset_id": datasetId,
                "user_id": user.id,
                "item_count": len(data),
            },
        )
        # Handle URL-based data (GitHub repos, HTTP URLs)
        if (
            len(data) == 1
            and hasattr(data[0], "filename")
            and isinstance(data[0].filename, str)
            and data[0].filename.startswith("http")
            and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
        ):
            url = data[0].filename
            if "github" in url:
                try:
                    # Perform git clone if the URL is from GitHub
-                    repo_name = data.split("/")[-1].replace(".git", "")
+                    repo_name = url.split("/")[-1].replace(".git", "")
-                    subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
+                    subprocess.run(["git", "clone", url, f".data/{repo_name}"], check=True)
                    # TODO: Update add call with dataset info
-                    await cognee_add(
+                    result = await cognee_add(
                        "data://.data/",
                        f"{repo_name}",
                    )
-                else:
+                except subprocess.CalledProcessError as e:
-                    # Fetch and store the data from other types of URL using curl
+                    raise CogneeSystemError(
-                    response = requests.get(data)
+                        message=f"Failed to clone GitHub repository: {e}",
                        user_message=f"Could not clone the GitHub repository '{url}'.",
                        suggestions=[
                            "Check if the repository URL is correct",
                            "Verify the repository is public or you have access",
                            "Try cloning the repository manually to test access",
                        ],
                        context={"url": url, "repo_name": repo_name, "error": str(e)},
                        operation="add",
                    )
            else:
                try:
                    # Fetch and store the data from other types of URL
                    response = requests.get(url, timeout=30)
                    response.raise_for_status()
-                    file_data = await response.content()
+                    file_data = response.content
                    # TODO: Update add call with dataset info
-                    return await cognee_add(file_data)
+                    result = await cognee_add(file_data)
-            else:
+                except requests.RequestException as e:
-                add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
+                    raise CogneeSystemError(
                        message=f"Failed to fetch URL: {e}",
                        user_message=f"Could not fetch content from '{url}'.",
                        suggestions=[
                            "Check if the URL is accessible",
                            "Verify your internet connection",
                            "Try accessing the URL in a browser",
                            "Check if the URL requires authentication",
                        ],
                        context={"url": url, "error": str(e)},
                        operation="add",
                    )
        else:
            # Handle regular file uploads
            # Validate file types before processing
            supported_extensions = [
                ".txt",
                ".pdf",
                ".docx",
                ".md",
                ".csv",
                ".json",
                ".py",
                ".js",
                ".ts",
            ]
-                return add_run.model_dump()
+            for file in data:
-        except Exception as error:
+                if file.filename:
-            return JSONResponse(status_code=409, content={"error": str(error)})
+                    file_ext = os.path.splitext(file.filename)[1].lower()
                    if file_ext and file_ext not in supported_extensions:
                        raise UnsupportedFileFormatError(
                            file_path=file.filename, supported_formats=supported_extensions
                        )
            # Process the files
            result = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
        logger.info(
            "Successfully added data to dataset",
            extra={"dataset_name": datasetName, "dataset_id": datasetId, "user_id": user.id},
        )
        return result.model_dump() if hasattr(result, "model_dump") else result
    return router
--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -24,6 +24,13 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
    remove_queue,
 )
 from cognee.shared.logging_utils import get_logger
 from cognee.exceptions import (
    CogneeValidationError,
    EmptyDatasetError,
    DatasetNotFoundError,
    MissingAPIKeyError,
    NoDataToProcessError,
 )
 logger = get_logger("api.cognify")
@ -66,8 +73,10 @@ def get_cognify_router() -> APIRouter:
        - **Background execution**: Pipeline run metadata including pipeline_run_id for status monitoring via WebSocket subscription
        ## Error Codes
-        - **400 Bad Request**: When neither datasets nor dataset_ids are provided, or when specified datasets don't exist
+        - **400 Bad Request**: Missing required parameters or invalid input
-        - **409 Conflict**: When processing fails due to system errors, missing LLM API keys, database connection failures, or corrupted content
+        - **422 Unprocessable Entity**: No data to process or validation errors
        - **404 Not Found**: Specified datasets don't exist
        - **500 Internal Server Error**: System errors, missing API keys, database connection failures
        ## Example Request
        ```json
@ -84,23 +93,53 @@ def get_cognify_router() -> APIRouter:
        ## Next Steps
        After successful processing, use the search endpoints to query the generated knowledge graph for insights, relationships, and semantic search.
        """
        # Input validation with enhanced exceptions
        if not payload.datasets and not payload.dataset_ids:
-            return JSONResponse(
+            raise CogneeValidationError(
-                status_code=400, content={"error": "No datasets or dataset_ids provided"}
+                message="No datasets or dataset_ids provided",
                user_message="You must specify which datasets to process.",
                suggestions=[
                    "Provide dataset names using the 'datasets' parameter",
                    "Provide dataset UUIDs using the 'dataset_ids' parameter",
                    "Use cognee.datasets() to see available datasets",
                ],
                docs_link="https://docs.cognee.ai/api/cognify",
                context={
                    "provided_datasets": payload.datasets,
                    "provided_dataset_ids": payload.dataset_ids,
                },
                operation="cognify",
            )
        # Check for LLM API key early to provide better error messaging
        llm_api_key = os.getenv("LLM_API_KEY")
        if not llm_api_key:
            raise MissingAPIKeyError(service="LLM", env_var="LLM_API_KEY")
        from cognee.api.v1.cognify import cognify as cognee_cognify
-        try:
+        datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
            datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
-            cognify_run = await cognee_cognify(
+        logger.info(
-                datasets, user, run_in_background=payload.run_in_background
+            f"Starting cognify process for user {user.id}",
-            )
+            extra={
                "user_id": user.id,
                "datasets": datasets,
                "run_in_background": payload.run_in_background,
            },
        )
-            return cognify_run
+        # The enhanced exception handler will catch and format any errors from cognee_cognify
-        except Exception as error:
+        cognify_run = await cognee_cognify(
-            return JSONResponse(status_code=409, content={"error": str(error)})
+            datasets, user, run_in_background=payload.run_in_background
        )
        logger.info(
            f"Cognify process completed for user {user.id}",
            extra={"user_id": user.id, "datasets": datasets},
        )
        return cognify_run
    @router.websocket("/subscribe/{pipeline_run_id}")
    async def subscribe_to_cognify_info(websocket: WebSocket, pipeline_run_id: str):
@ -135,31 +174,43 @@ def get_cognify_router() -> APIRouter:
        initialize_queue(pipeline_run_id)
-        while True:
+        try:
-            pipeline_run_info = get_from_queue(pipeline_run_id)
+            # If the pipeline is already completed, send the completion status
            if isinstance(pipeline_run, PipelineRunCompleted):
                graph_data = await get_formatted_graph_data()
                pipeline_run.payload = {
                    "nodes": graph_data.get("nodes", []),
                    "edges": graph_data.get("edges", []),
                }
-            if not pipeline_run_info:
+                await websocket.send_json(pipeline_run.model_dump())
-                await asyncio.sleep(2)
+                await websocket.close(code=WS_1000_NORMAL_CLOSURE)
-                continue
+                return
-            if not isinstance(pipeline_run_info, PipelineRunInfo):
+            # Stream pipeline updates
-                continue
+            while True:
                try:
                    pipeline_run_info = await asyncio.wait_for(
                        get_from_queue(pipeline_run_id), timeout=10.0
                    )
-            try:
+                    if pipeline_run_info:
-                await websocket.send_json(
+                        await websocket.send_json(pipeline_run_info.model_dump())
                    {
                        "pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
                        "status": pipeline_run_info.status,
                        "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user.id),
                    }
                )
-                if isinstance(pipeline_run_info, PipelineRunCompleted):
+                        if isinstance(pipeline_run_info, PipelineRunCompleted):
-                    remove_queue(pipeline_run_id)
+                            break
-                    await websocket.close(code=WS_1000_NORMAL_CLOSURE)
+                except asyncio.TimeoutError:
                    # Send a heartbeat to keep the connection alive
                    await websocket.send_json({"type": "heartbeat"})
                except Exception as e:
                    logger.error(f"Error in WebSocket communication: {str(e)}")
                    break
-            except WebSocketDisconnect:
+
-                remove_queue(pipeline_run_id)
+        except WebSocketDisconnect:
-                break
+            logger.info(f"WebSocket disconnected for pipeline {pipeline_run_id}")
        except Exception as error:
            logger.error(f"WebSocket error: {str(error)}")
        finally:
            remove_queue(pipeline_run_id)
    return router
--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@ -1,22 +1,21 @@
 from uuid import UUID
-from typing import Optional
+from typing import List, Optional
-from datetime import datetime
+from fastapi import APIRouter, Depends
 from fastapi import Depends, APIRouter
 from fastapi.responses import JSONResponse
 from cognee.api.DTO import InDTO
 from cognee.modules.search.types import SearchType
 from cognee.api.DTO import InDTO, OutDTO
 from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
 from cognee.modules.users.models import User
 from cognee.modules.search.operations import get_history
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.modules.users.models import User
 from cognee.modules.users.exceptions import PermissionDeniedError
 from cognee.modules.data.methods import get_history
 from cognee.exceptions import UnsupportedSearchTypeError, InvalidQueryError, NoDataToProcessError
 # Note: Datasets sent by name will only map to datasets owned by the request sender
 #       To search for datasets not owned by the request sender dataset UUID is needed
 class SearchPayloadDTO(InDTO):
    search_type: SearchType
-    datasets: Optional[list[str]] = None
+    datasets: Optional[List[str]] = None
-    dataset_ids: Optional[list[UUID]] = None
+    dataset_ids: Optional[List[UUID]] = None
    query: str
    top_k: Optional[int] = 10
@ -24,36 +23,23 @@ class SearchPayloadDTO(InDTO):
 def get_search_router() -> APIRouter:
    router = APIRouter()
-    class SearchHistoryItem(OutDTO):
+    @router.get("/history", response_model=list)
        id: UUID
        text: str
        user: str
        created_at: datetime
    @router.get("", response_model=list[SearchHistoryItem])
    async def get_search_history(user: User = Depends(get_authenticated_user)):
        """
        Get search history for the authenticated user.
-        This endpoint retrieves the search history for the authenticated user,
+        This endpoint retrieves the search history for the current user,
-        returning a list of previously executed searches with their timestamps.
+        showing previous queries and their results.
        ## Response
-        Returns a list of search history items containing:
+        Returns a list of historical search queries and their metadata.
        - **id**: Unique identifier for the search
        - **text**: The search query text
        - **user**: User who performed the search
        - **created_at**: When the search was performed
        ## Error Codes
-        - **500 Internal Server Error**: Error retrieving search history
+        - **500 Internal Server Error**: Database or system error while retrieving history
        """
-        try:
+        # Remove try-catch to let enhanced exception handler deal with it
-            history = await get_history(user.id, limit=0)
+        history = await get_history(user.id, limit=0)
-
+        return history
            return history
        except Exception as error:
            return JSONResponse(status_code=500, content={"error": str(error)})
    @router.post("", response_model=list)
    async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
@ -75,30 +61,51 @@ def get_search_router() -> APIRouter:
        Returns a list of search results containing relevant nodes from the graph.
        ## Error Codes
-        - **409 Conflict**: Error during search operation
+        - **400 Bad Request**: Invalid query or search parameters
-        - **403 Forbidden**: User doesn't have permission to search datasets (returns empty list)
+        - **404 Not Found**: No data found to search
        - **422 Unprocessable Entity**: Unsupported search type
        - **403 Forbidden**: User doesn't have permission to search datasets
        - **500 Internal Server Error**: System error during search
        ## Notes
        - Datasets sent by name will only map to datasets owned by the request sender
        - To search datasets not owned by the request sender, dataset UUID is needed
-        - If permission is denied, returns empty list instead of error
+        - Enhanced error messages provide actionable suggestions for fixing issues
        """
        from cognee.api.v1.search import search as cognee_search
-        try:
+        # Input validation with enhanced exceptions
-            results = await cognee_search(
+        if not payload.query or not payload.query.strip():
-                query_text=payload.query,
+            raise InvalidQueryError(query=payload.query or "", reason="Query cannot be empty")
-                query_type=payload.search_type,
+
-                user=user,
+        if len(payload.query.strip()) < 2:
-                datasets=payload.datasets,
+            raise InvalidQueryError(
-                dataset_ids=payload.dataset_ids,
+                query=payload.query, reason="Query must be at least 2 characters long"
                top_k=payload.top_k,
            )
-            return results
+        # Check if search type is supported
-        except PermissionDeniedError:
+        try:
            search_type = payload.search_type
        except ValueError:
            raise UnsupportedSearchTypeError(
                search_type=str(payload.search_type), supported_types=[t.value for t in SearchType]
            )
        # Permission denied errors will be caught and handled by the enhanced exception handler
        # Other exceptions will also be properly formatted by the global handler
        results = await cognee_search(
            query_text=payload.query,
            query_type=payload.search_type,
            user=user,
            datasets=payload.datasets,
            dataset_ids=payload.dataset_ids,
            top_k=payload.top_k,
        )
        # If no results found, that's not necessarily an error, just return empty list
        if not results:
            return []
-        except Exception as error:
+
-            return JSONResponse(status_code=409, content={"error": str(error)})
+        return results
    return router
--- a/cognee/exceptions/init.py
+++ b/cognee/exceptions/init.py
@ -1,10 +1,11 @@
 """
 Custom exceptions for the Cognee API.
-This module defines a set of exceptions for handling various application errors,
+This module defines a comprehensive set of exceptions for handling various application errors,
-such as service failures, resource conflicts, and invalid operations.
+with enhanced error context, user-friendly messages, and actionable suggestions.
 """
 # Import original exceptions for backward compatibility
 from .exceptions import (
    CogneeApiError,
    ServiceError,
@ -12,3 +13,83 @@ from .exceptions import (
    InvalidAttributeError,
    CriticalError,
 )
 # Import enhanced exception hierarchy
 from .enhanced_exceptions import (
    CogneeBaseError,
    CogneeUserError,
    CogneeSystemError,
    CogneeTransientError,
    CogneeConfigurationError,
    CogneeValidationError,
    CogneeAuthenticationError,
    CogneePermissionError,
    CogneeNotFoundError,
    CogneeRateLimitError,
 )
 # Import domain-specific exceptions
 from .domain_exceptions import (
    # Data/Input Errors
    UnsupportedFileFormatError,
    EmptyDatasetError,
    DatasetNotFoundError,
    InvalidQueryError,
    FileAccessError,
    # Processing Errors
    LLMConnectionError,
    LLMRateLimitError,
    ProcessingTimeoutError,
    DatabaseConnectionError,
    InsufficientResourcesError,
    # Configuration Errors
    MissingAPIKeyError,
    InvalidDatabaseConfigError,
    UnsupportedSearchTypeError,
    # Pipeline Errors
    PipelineExecutionError,
    DataExtractionError,
    NoDataToProcessError,
 )
 # For backward compatibility, create aliases
 # These will allow existing code to continue working while we migrate
 DatasetNotFoundError_Legacy = InvalidValueError  # For existing dataset not found errors
 PermissionDeniedError_Legacy = CogneeApiError  # For existing permission errors
 __all__ = [
    # Original exceptions (backward compatibility)
    "CogneeApiError",
    "ServiceError",
    "InvalidValueError",
    "InvalidAttributeError",
    "CriticalError",
    # Enhanced base exceptions
    "CogneeBaseError",
    "CogneeUserError",
    "CogneeSystemError",
    "CogneeTransientError",
    "CogneeConfigurationError",
    "CogneeValidationError",
    "CogneeAuthenticationError",
    "CogneePermissionError",
    "CogneeNotFoundError",
    "CogneeRateLimitError",
    # Domain-specific exceptions
    "UnsupportedFileFormatError",
    "EmptyDatasetError",
    "DatasetNotFoundError",
    "InvalidQueryError",
    "FileAccessError",
    "LLMConnectionError",
    "LLMRateLimitError",
    "ProcessingTimeoutError",
    "DatabaseConnectionError",
    "InsufficientResourcesError",
    "MissingAPIKeyError",
    "InvalidDatabaseConfigError",
    "UnsupportedSearchTypeError",
    "PipelineExecutionError",
    "DataExtractionError",
    "NoDataToProcessError",
 ]
--- a/cognee/exceptions/domain_exceptions.py
+++ b/cognee/exceptions/domain_exceptions.py
@ -0,0 +1,337 @@
 from typing import List, Optional, Dict, Any
 from .enhanced_exceptions import (
    CogneeUserError,
    CogneeSystemError,
    CogneeTransientError,
    CogneeConfigurationError,
    CogneeValidationError,
    CogneeNotFoundError,
    CogneePermissionError,
 )
 # ========== DATA/INPUT ERRORS (User-fixable) ==========
 class UnsupportedFileFormatError(CogneeValidationError):
    """File format not supported by Cognee"""
    def __init__(self, file_path: str, supported_formats: List[str], **kwargs):
        super().__init__(
            message=f"File format not supported: {file_path}",
            user_message=f"The file '{file_path}' has an unsupported format.",
            suggestions=[
                f"Use one of these supported formats: {', '.join(supported_formats)}",
                "Convert your file to a supported format",
                "Check our documentation for the complete list of supported formats",
            ],
            docs_link="https://docs.cognee.ai/guides/file-formats",
            context={"file_path": file_path, "supported_formats": supported_formats},
            operation="add",
            **kwargs,
        )
 class EmptyDatasetError(CogneeValidationError):
    """Dataset is empty or contains no processable content"""
    def __init__(self, dataset_name: str, **kwargs):
        super().__init__(
            message=f"Dataset '{dataset_name}' is empty",
            user_message=f"The dataset '{dataset_name}' contains no data to process.",
            suggestions=[
                "Add some data to the dataset first using cognee.add()",
                "Check if your files contain readable text content",
                "Verify that your data was uploaded successfully",
            ],
            docs_link="https://docs.cognee.ai/guides/adding-data",
            context={"dataset_name": dataset_name},
            operation="cognify",
            **kwargs,
        )
 class DatasetNotFoundError(CogneeNotFoundError):
    """Dataset not found or not accessible"""
    def __init__(
        self, dataset_identifier: str, available_datasets: Optional[List[str]] = None, **kwargs
    ):
        suggestions = ["Check the dataset name for typos"]
        if available_datasets:
            suggestions.extend(
                [
                    f"Available datasets: {', '.join(available_datasets)}",
                    "Use cognee.datasets() to see all your datasets",
                ]
            )
        else:
            suggestions.append("Create the dataset first by adding data to it")
        super().__init__(
            message=f"Dataset not found: {dataset_identifier}",
            user_message=f"Could not find dataset '{dataset_identifier}'.",
            suggestions=suggestions,
            docs_link="https://docs.cognee.ai/guides/datasets",
            context={
                "dataset_identifier": dataset_identifier,
                "available_datasets": available_datasets,
            },
            **kwargs,
        )
 class InvalidQueryError(CogneeValidationError):
    """Search query is invalid or malformed"""
    def __init__(self, query: str, reason: str, **kwargs):
        super().__init__(
            message=f"Invalid query: {reason}",
            user_message=f"Your search query '{query}' is invalid: {reason}",
            suggestions=[
                "Try rephrasing your query",
                "Use simpler, more specific terms",
                "Check our query examples in the documentation",
            ],
            docs_link="https://docs.cognee.ai/guides/search",
            context={"query": query, "reason": reason},
            operation="search",
            **kwargs,
        )
 class FileAccessError(CogneeUserError):
    """Cannot access or read the specified file"""
    def __init__(self, file_path: str, reason: str, **kwargs):
        super().__init__(
            message=f"Cannot access file: {file_path} - {reason}",
            user_message=f"Unable to read the file '{file_path}': {reason}",
            suggestions=[
                "Check if the file exists at the specified path",
                "Verify you have read permissions for the file",
                "Ensure the file is not locked by another application",
            ],
            context={"file_path": file_path, "reason": reason},
            operation="add",
            **kwargs,
        )
 # ========== PROCESSING ERRORS (System/LLM errors) ==========
 class LLMConnectionError(CogneeTransientError):
    """LLM service connection failure"""
    def __init__(self, provider: str, model: str, reason: str, **kwargs):
        super().__init__(
            message=f"LLM connection failed: {provider}/{model} - {reason}",
            user_message=f"Cannot connect to the {provider} language model service.",
            suggestions=[
                "Check your internet connection",
                "Verify your API key is correct and has sufficient credits",
                "Try again in a few moments",
                "Check the service status page",
            ],
            docs_link="https://docs.cognee.ai/troubleshooting/llm-connection",
            context={"provider": provider, "model": model, "reason": reason},
            **kwargs,
        )
 class LLMRateLimitError(CogneeTransientError):
    """LLM service rate limit exceeded"""
    def __init__(self, provider: str, retry_after: Optional[int] = None, **kwargs):
        suggestions = [
            "Wait a moment before retrying",
            "Consider upgrading your API plan",
            "Use smaller batch sizes to reduce token usage",
        ]
        if retry_after:
            suggestions.insert(0, f"Wait {retry_after} seconds before retrying")
        super().__init__(
            message=f"Rate limit exceeded for {provider}",
            user_message=f"You've exceeded the rate limit for {provider}.",
            suggestions=suggestions,
            context={"provider": provider, "retry_after": retry_after},
            **kwargs,
        )
 class ProcessingTimeoutError(CogneeTransientError):
    """Processing operation timed out"""
    def __init__(self, operation: str, timeout_seconds: int, **kwargs):
        super().__init__(
            message=f"Operation '{operation}' timed out after {timeout_seconds}s",
            user_message=f"The {operation} operation took too long and was cancelled.",
            suggestions=[
                "Try processing smaller amounts of data at a time",
                "Check your internet connection stability",
                "Retry the operation",
                "Use background processing for large datasets",
            ],
            context={"operation": operation, "timeout_seconds": timeout_seconds},
            **kwargs,
        )
 class DatabaseConnectionError(CogneeSystemError):
    """Database connection failure"""
    def __init__(self, db_type: str, reason: str, **kwargs):
        super().__init__(
            message=f"{db_type} database connection failed: {reason}",
            user_message=f"Cannot connect to the {db_type} database.",
            suggestions=[
                "Check if the database service is running",
                "Verify database connection configuration",
                "Check network connectivity",
                "Contact support if the issue persists",
            ],
            docs_link="https://docs.cognee.ai/troubleshooting/database",
            context={"db_type": db_type, "reason": reason},
            **kwargs,
        )
 class InsufficientResourcesError(CogneeSystemError):
    """System has insufficient resources to complete the operation"""
    def __init__(self, resource_type: str, required: str, available: str, **kwargs):
        super().__init__(
            message=f"Insufficient {resource_type}: need {required}, have {available}",
            user_message=f"Not enough {resource_type} available to complete this operation.",
            suggestions=[
                "Try processing smaller amounts of data",
                "Free up system resources",
                "Wait for other operations to complete",
                "Consider upgrading your system resources",
            ],
            context={"resource_type": resource_type, "required": required, "available": available},
            **kwargs,
        )
 # ========== CONFIGURATION ERRORS ==========
 class MissingAPIKeyError(CogneeConfigurationError):
    """Required API key is missing"""
    def __init__(self, service: str, env_var: str, **kwargs):
        super().__init__(
            message=f"Missing API key for {service}",
            user_message=f"API key for {service} is not configured.",
            suggestions=[
                f"Set the {env_var} environment variable",
                f"Add your {service} API key to your .env file",
                "Check the setup documentation for detailed instructions",
            ],
            docs_link="https://docs.cognee.ai/setup/api-keys",
            context={"service": service, "env_var": env_var},
            **kwargs,
        )
 class InvalidDatabaseConfigError(CogneeConfigurationError):
    """Database configuration is invalid"""
    def __init__(self, db_type: str, config_issue: str, **kwargs):
        super().__init__(
            message=f"Invalid {db_type} database configuration: {config_issue}",
            user_message=f"The {db_type} database is not properly configured: {config_issue}",
            suggestions=[
                "Check your database configuration settings",
                "Verify connection strings and credentials",
                "Review the database setup documentation",
                "Ensure the database server is accessible",
            ],
            docs_link="https://docs.cognee.ai/setup/databases",
            context={"db_type": db_type, "config_issue": config_issue},
            **kwargs,
        )
 class UnsupportedSearchTypeError(CogneeValidationError):
    """Search type is not supported"""
    def __init__(self, search_type: str, supported_types: List[str], **kwargs):
        super().__init__(
            message=f"Unsupported search type: {search_type}",
            user_message=f"The search type '{search_type}' is not supported.",
            suggestions=[
                f"Use one of these supported search types: {', '.join(supported_types)}",
                "Check the search documentation for available types",
                "Try using GRAPH_COMPLETION for general queries",
            ],
            docs_link="https://docs.cognee.ai/guides/search-types",
            context={"search_type": search_type, "supported_types": supported_types},
            operation="search",
            **kwargs,
        )
 # ========== PIPELINE ERRORS ==========
 class PipelineExecutionError(CogneeSystemError):
    """Pipeline execution failed"""
    def __init__(self, pipeline_name: str, task_name: str, error_details: str, **kwargs):
        super().__init__(
            message=f"Pipeline '{pipeline_name}' failed at task '{task_name}': {error_details}",
            user_message=f"Processing failed during the {task_name} step.",
            suggestions=[
                "Check the logs for more detailed error information",
                "Verify your data is in a supported format",
                "Try processing smaller amounts of data",
                "Contact support if the issue persists",
            ],
            context={
                "pipeline_name": pipeline_name,
                "task_name": task_name,
                "error_details": error_details,
            },
            **kwargs,
        )
 class DataExtractionError(CogneeSystemError):
    """Failed to extract content from data"""
    def __init__(self, source: str, reason: str, **kwargs):
        super().__init__(
            message=f"Data extraction failed for {source}: {reason}",
            user_message=f"Could not extract readable content from '{source}'.",
            suggestions=[
                "Verify the file is not corrupted",
                "Try converting to a different format",
                "Check if the file contains readable text",
                "Use a supported file format",
            ],
            context={"source": source, "reason": reason},
            operation="add",
            **kwargs,
        )
 class NoDataToProcessError(CogneeValidationError):
    """No data available to process"""
    def __init__(self, operation: str, **kwargs):
        super().__init__(
            message=f"No data available for {operation}",
            user_message=f"There's no data to process for the {operation} operation.",
            suggestions=[
                "Add some data first using cognee.add()",
                "Check if your previous data upload was successful",
                "Verify the dataset contains processable content",
            ],
            docs_link="https://docs.cognee.ai/guides/adding-data",
            context={"operation": operation},
            **kwargs,
        )
--- a/cognee/exceptions/enhanced_exceptions.py
+++ b/cognee/exceptions/enhanced_exceptions.py
@ -0,0 +1,184 @@
 from typing import Dict, List, Optional, Any
 from fastapi import status
 from cognee.shared.logging_utils import get_logger
 logger = get_logger()
 class CogneeBaseError(Exception):
    """
    Base exception for all Cognee errors with enhanced context and user experience.
    This class provides a foundation for all Cognee exceptions with:
    - Rich error context
    - User-friendly messages
    - Actionable suggestions
    - Documentation links
    - Retry information
    """
    def __init__(
        self,
        message: str,
        user_message: Optional[str] = None,
        suggestions: Optional[List[str]] = None,
        docs_link: Optional[str] = None,
        context: Optional[Dict[str, Any]] = None,
        is_retryable: bool = False,
        status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
        log_level: str = "ERROR",
        operation: Optional[str] = None,
    ):
        self.message = message
        self.user_message = user_message or message
        self.suggestions = suggestions or []
        self.docs_link = docs_link
        self.context = context or {}
        self.is_retryable = is_retryable
        self.status_code = status_code
        self.operation = operation
        # Automatically log the exception
        if log_level == "ERROR":
            logger.error(f"CogneeError in {operation or 'unknown'}: {message}", extra=self.context)
        elif log_level == "WARNING":
            logger.warning(
                f"CogneeWarning in {operation or 'unknown'}: {message}", extra=self.context
            )
        elif log_level == "INFO":
            logger.info(f"CogneeInfo in {operation or 'unknown'}: {message}", extra=self.context)
        super().__init__(self.message)
    def __str__(self):
        return f"{self.__class__.__name__}: {self.message}"
    def to_dict(self) -> Dict[str, Any]:
        """Convert exception to dictionary for API responses"""
        return {
            "type": self.__class__.__name__,
            "message": self.user_message,
            "technical_message": self.message,
            "suggestions": self.suggestions,
            "docs_link": self.docs_link,
            "is_retryable": self.is_retryable,
            "context": self.context,
            "operation": self.operation,
        }
 class CogneeUserError(CogneeBaseError):
    """
    User-fixable errors (4xx status codes).
    These are errors caused by user input or actions that can be corrected
    by the user. Examples: invalid file format, missing required field.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_400_BAD_REQUEST)
        kwargs.setdefault("log_level", "WARNING")
        super().__init__(*args, **kwargs)
 class CogneeSystemError(CogneeBaseError):
    """
    System/infrastructure errors (5xx status codes).
    These are errors caused by system issues that require technical intervention.
    Examples: database connection failure, service unavailable.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_500_INTERNAL_SERVER_ERROR)
        kwargs.setdefault("log_level", "ERROR")
        super().__init__(*args, **kwargs)
 class CogneeTransientError(CogneeBaseError):
    """
    Temporary/retryable errors.
    These are errors that might succeed if retried, often due to temporary
    resource constraints or network issues.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_503_SERVICE_UNAVAILABLE)
        kwargs.setdefault("is_retryable", True)
        kwargs.setdefault("log_level", "WARNING")
        super().__init__(*args, **kwargs)
 class CogneeConfigurationError(CogneeBaseError):
    """
    Setup/configuration errors.
    These are errors related to missing or invalid configuration that
    prevent the system from operating correctly.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_422_UNPROCESSABLE_ENTITY)
        kwargs.setdefault("log_level", "ERROR")
        super().__init__(*args, **kwargs)
 class CogneeValidationError(CogneeUserError):
    """
    Input validation errors.
    Specific type of user error for invalid input data.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_422_UNPROCESSABLE_ENTITY)
        super().__init__(*args, **kwargs)
 class CogneeAuthenticationError(CogneeUserError):
    """
    Authentication and authorization errors.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_401_UNAUTHORIZED)
        super().__init__(*args, **kwargs)
 class CogneePermissionError(CogneeUserError):
    """
    Permission denied errors.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_403_FORBIDDEN)
        super().__init__(*args, **kwargs)
 class CogneeNotFoundError(CogneeUserError):
    """
    Resource not found errors.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_404_NOT_FOUND)
        super().__init__(*args, **kwargs)
 class CogneeRateLimitError(CogneeTransientError):
    """
    Rate limiting errors.
    """
    def __init__(self, *args, **kwargs):
        kwargs.setdefault("status_code", status.HTTP_429_TOO_MANY_REQUESTS)
        kwargs.setdefault(
            "suggestions",
            [
                "Wait a moment before retrying",
                "Check your API rate limits",
                "Consider using smaller batch sizes",
            ],
        )
        super().__init__(*args, **kwargs)
--- a/cognee/modules/pipelines/operations/run_tasks_base.py
+++ b/cognee/modules/pipelines/operations/run_tasks_base.py
@ -2,6 +2,14 @@ import inspect
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.users.models import User
 from cognee.shared.utils import send_telemetry
 from cognee.exceptions import (
    PipelineExecutionError,
    CogneeTransientError,
    CogneeSystemError,
    CogneeUserError,
    LLMConnectionError,
    DatabaseConnectionError,
 )
 from ..tasks.task import Task
@ -16,15 +24,33 @@ async def handle_task(
    user: User,
    context: dict = None,
 ):
-    """Handle common task workflow with logging, telemetry, and error handling around the core execution logic."""
+    """
-    task_type = running_task.task_type
+    Handle common task workflow with enhanced error handling and recovery strategies.
    This function provides comprehensive error handling for pipeline tasks with:
    - Context-aware error reporting
    - Automatic retry for transient errors
    - Detailed error logging and telemetry
    - User-friendly error messages
    """
    task_type = running_task.task_type
    task_name = running_task.executable.__name__
    logger.info(
        f"{task_type} task started: `{task_name}`",
        extra={
            "task_type": task_type,
            "task_name": task_name,
            "user_id": user.id,
            "context": context,
        },
    )
    logger.info(f"{task_type} task started: `{running_task.executable.__name__}`")
    send_telemetry(
        f"{task_type} Task Started",
        user_id=user.id,
        additional_properties={
-            "task_name": running_task.executable.__name__,
+            "task_name": task_name,
        },
    )
@ -35,36 +61,151 @@ async def handle_task(
    if has_context:
        args.append(context)
-    try:
+    # Retry configuration for transient errors
-        async for result_data in running_task.execute(args, next_task_batch_size):
+    max_retries = 3
-            async for result in run_tasks_base(leftover_tasks, result_data, user, context):
+    retry_count = 0
                yield result
-        logger.info(f"{task_type} task completed: `{running_task.executable.__name__}`")
+    while retry_count <= max_retries:
-        send_telemetry(
+        try:
-            f"{task_type} Task Completed",
+            async for result_data in running_task.execute(args, next_task_batch_size):
-            user_id=user.id,
+                async for result in run_tasks_base(leftover_tasks, result_data, user, context):
-            additional_properties={
+                    yield result
-                "task_name": running_task.executable.__name__,
+
-            },
+            logger.info(
-        )
+                f"{task_type} task completed: `{task_name}`",
-    except Exception as error:
+                extra={
-        logger.error(
+                    "task_type": task_type,
-            f"{task_type} task errored: `{running_task.executable.__name__}`\n{str(error)}\n",
+                    "task_name": task_name,
-            exc_info=True,
+                    "user_id": user.id,
-        )
+                    "retry_count": retry_count,
-        send_telemetry(
+                },
-            f"{task_type} Task Errored",
+            )
-            user_id=user.id,
+
-            additional_properties={
+            send_telemetry(
-                "task_name": running_task.executable.__name__,
+                f"{task_type} Task Completed",
-            },
+                user_id=user.id,
-        )
+                additional_properties={
-        raise error
+                    "task_name": task_name,
                    "retry_count": retry_count,
                },
            )
            return  # Success, exit retry loop
        except CogneeTransientError as error:
            retry_count += 1
            if retry_count <= max_retries:
                logger.warning(
                    f"Transient error in {task_type} task `{task_name}`, retrying ({retry_count}/{max_retries}): {error}",
                    extra={
                        "task_type": task_type,
                        "task_name": task_name,
                        "user_id": user.id,
                        "retry_count": retry_count,
                        "error_type": error.__class__.__name__,
                    },
                )
                # Exponential backoff for retries
                import asyncio
                await asyncio.sleep(2**retry_count)
                continue
            else:
                # Max retries exceeded, raise enhanced error
                raise PipelineExecutionError(
                    pipeline_name=f"{task_type}_pipeline",
                    task_name=task_name,
                    error_details=f"Max retries ({max_retries}) exceeded for transient error: {error}",
                )
        except (CogneeUserError, CogneeSystemError) as error:
            # These errors shouldn't be retried, re-raise as pipeline execution error
            logger.error(
                f"{task_type} task failed: `{task_name}` - {error.__class__.__name__}: {error}",
                extra={
                    "task_type": task_type,
                    "task_name": task_name,
                    "user_id": user.id,
                    "error_type": error.__class__.__name__,
                    "error_context": getattr(error, "context", {}),
                },
                exc_info=True,
            )
            send_telemetry(
                f"{task_type} Task Errored",
                user_id=user.id,
                additional_properties={
                    "task_name": task_name,
                    "error_type": error.__class__.__name__,
                },
            )
            # Wrap in pipeline execution error with additional context
            raise PipelineExecutionError(
                pipeline_name=f"{task_type}_pipeline",
                task_name=task_name,
                error_details=f"{error.__class__.__name__}: {error}",
                context={
                    "original_error": error.__class__.__name__,
                    "original_context": getattr(error, "context", {}),
                    "user_id": user.id,
                    "task_args": str(args)[:200],  # Truncate for logging
                },
            )
        except Exception as error:
            # Unexpected error, wrap in enhanced exception
            logger.error(
                f"{task_type} task encountered unexpected error: `{task_name}` - {error}",
                extra={
                    "task_type": task_type,
                    "task_name": task_name,
                    "user_id": user.id,
                    "error_type": error.__class__.__name__,
                },
                exc_info=True,
            )
            send_telemetry(
                f"{task_type} Task Errored",
                user_id=user.id,
                additional_properties={
                    "task_name": task_name,
                    "error_type": error.__class__.__name__,
                },
            )
            # Check if this might be a known error type we can categorize
            error_message = str(error).lower()
            if any(term in error_message for term in ["connection", "timeout", "network"]):
                if (
                    "llm" in error_message
                    or "openai" in error_message
                    or "anthropic" in error_message
                ):
                    raise LLMConnectionError(provider="Unknown", model="Unknown", reason=str(error))
                elif "database" in error_message or "sql" in error_message:
                    raise DatabaseConnectionError(db_type="Unknown", reason=str(error))
            # Default to pipeline execution error
            raise PipelineExecutionError(
                pipeline_name=f"{task_type}_pipeline",
                task_name=task_name,
                error_details=f"Unexpected error: {error}",
                context={
                    "error_type": error.__class__.__name__,
                    "user_id": user.id,
                    "task_args": str(args)[:200],  # Truncate for logging
                },
            )
 async def run_tasks_base(tasks: list[Task], data=None, user: User = None, context: dict = None):
-    """Base function to execute tasks in a pipeline, handling task type detection and execution."""
+    """
    Base function to execute tasks in a pipeline with enhanced error handling.
    Provides comprehensive error handling, logging, and recovery strategies for pipeline execution.
    """
    if len(tasks) == 0:
        yield data
        return