refactor: simplify endpoint default values (#1123)

## Description Simplify Cognee endpoints so default dataset ID will be None ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-07-22 15:09:44 +02:00 · 2025-07-22 15:09:44 +02:00 · 022c96de55
commit 022c96de55
parent 115585ee9c
5 changed files with 19 additions and 21 deletions
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -14,7 +14,7 @@ async def add(
    node_set: Optional[List[str]] = None,
    vector_db_config: dict = None,
    graph_db_config: dict = None,
-    dataset_id: UUID = None,
+    dataset_id: Optional[UUID] = None,
 ):
    """
    Add data to Cognee for knowledge graph processing.
--- a/cognee/api/v1/add/routers/get_add_router.py
+++ b/cognee/api/v1/add/routers/get_add_router.py
@ -1,10 +1,10 @@
 import os
 from uuid import UUID

-from fastapi import Form, UploadFile, Depends
+from fastapi import Form, File, UploadFile, Depends
 from fastapi.responses import JSONResponse
 from fastapi import APIRouter
-from typing import List, Optional
+from typing import List, Optional, Union, Literal
 import subprocess
 from cognee.shared.logging_utils import get_logger
 import requests
@ -20,9 +20,9 @@ def get_add_router() -> APIRouter:

    @router.post("", response_model=dict)
    async def add(
-        data: List[UploadFile],
+        data: List[UploadFile] = File(default=None),
        datasetName: Optional[str] = Form(default=None),
-        datasetId: Optional[UUID] = Form(default=None),
+        datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
        user: User = Depends(get_authenticated_user),
    ):
        """
@ -38,7 +38,7 @@ def get_add_router() -> APIRouter:
          - GitHub repository URLs (will be cloned and processed)
          - Regular file uploads
        - **datasetName** (Optional[str]): Name of the dataset to add data to
-        - **datasetId** (Optional[UUID]): UUID of the dataset to add data to
+        - **datasetId** (Optional[UUID]): UUID of an already existing dataset

        Either datasetName or datasetId must be provided.

@ -58,6 +58,7 @@ def get_add_router() -> APIRouter:
        - GitHub repositories are cloned and all files are processed
        - HTTP URLs are fetched and their content is processed
        - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
+        - datasetId value can only be the UUID of an already existing dataset
        """
        from cognee.api.v1.add import add as cognee_add

--- a/cognee/api/v1/cognify/routers/get_cognify_router.py
+++ b/cognee/api/v1/cognify/routers/get_cognify_router.py
@ -1,7 +1,7 @@
 import os
 import asyncio
 from uuid import UUID
-from pydantic import BaseModel
+from pydantic import Field
 from typing import List, Optional
 from fastapi.responses import JSONResponse
 from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
@ -10,7 +10,6 @@ from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
 from cognee.api.DTO import InDTO
 from cognee.modules.pipelines.methods import get_pipeline_run
 from cognee.modules.users.models import User
-from cognee.shared.data_models import KnowledgeGraph
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.modules.users.get_user_db import get_user_db_context
 from cognee.modules.graph.methods import get_formatted_graph_data
@ -30,9 +29,9 @@ logger = get_logger("api.cognify")


 class CognifyPayloadDTO(InDTO):
-    datasets: Optional[List[str]] = None
-    dataset_ids: Optional[List[UUID]] = None
-    run_in_background: Optional[bool] = False
+    datasets: Optional[List[str]] = Field(default=None)
+    dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
+    run_in_background: Optional[bool] = Field(default=False)


 def get_cognify_router() -> APIRouter:
@ -57,8 +56,7 @@ def get_cognify_router() -> APIRouter:

        ## Request Parameters
        - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
-        - **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
-        - **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing.
+        - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
        - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).

        ## Response
--- a/cognee/api/v1/search/routers/get_search_router.py
+++ b/cognee/api/v1/search/routers/get_search_router.py
@ -1,6 +1,7 @@
 from uuid import UUID
 from typing import Optional
 from datetime import datetime
+from pydantic import Field
 from fastapi import Depends, APIRouter
 from fastapi.responses import JSONResponse
 from cognee.modules.search.types import SearchType
@ -14,11 +15,11 @@ from cognee.modules.users.methods import get_authenticated_user
 # Note: Datasets sent by name will only map to datasets owned by the request sender
 #       To search for datasets not owned by the request sender dataset UUID is needed
 class SearchPayloadDTO(InDTO):
-    search_type: SearchType
-    datasets: Optional[list[str]] = None
-    dataset_ids: Optional[list[UUID]] = None
-    query: str
-    top_k: Optional[int] = 10
+    search_type: SearchType = Field(default=SearchType.GRAPH_COMPLETION)
+    datasets: Optional[list[str]] = Field(default=None)
+    dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]])
+    query: str = Field(default="What is in the document?")
+    top_k: Optional[int] = Field(default=10)


 def get_search_router() -> APIRouter:
--- a/cognee/modules/ingestion/classify.py
+++ b/cognee/modules/ingestion/classify.py
@ -12,9 +12,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None):
        return TextData(data)

    if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile):
-        return BinaryData(
-            data, str(data.name).split("/")[-1] if hasattr(data, "name") else filename
-        )
+        return BinaryData(data, filename if filename else str(data.name).split("/")[-1])

    try:
        from s3fs import S3File