refactor: simplify endpoint default values (#1123)

<!-- .github/pull_request_template.md -->

## Description
Simplify Cognee endpoints so default dataset ID will be None

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-07-22 15:09:44 +02:00 committed by GitHub
parent 115585ee9c
commit 022c96de55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 19 additions and 21 deletions

View file

@ -14,7 +14,7 @@ async def add(
node_set: Optional[List[str]] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
dataset_id: UUID = None,
dataset_id: Optional[UUID] = None,
):
"""
Add data to Cognee for knowledge graph processing.

View file

@ -1,10 +1,10 @@
import os
from uuid import UUID
from fastapi import Form, UploadFile, Depends
from fastapi import Form, File, UploadFile, Depends
from fastapi.responses import JSONResponse
from fastapi import APIRouter
from typing import List, Optional
from typing import List, Optional, Union, Literal
import subprocess
from cognee.shared.logging_utils import get_logger
import requests
@ -20,9 +20,9 @@ def get_add_router() -> APIRouter:
@router.post("", response_model=dict)
async def add(
data: List[UploadFile],
data: List[UploadFile] = File(default=None),
datasetName: Optional[str] = Form(default=None),
datasetId: Optional[UUID] = Form(default=None),
datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
user: User = Depends(get_authenticated_user),
):
"""
@ -38,7 +38,7 @@ def get_add_router() -> APIRouter:
- GitHub repository URLs (will be cloned and processed)
- Regular file uploads
- **datasetName** (Optional[str]): Name of the dataset to add data to
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
Either datasetName or datasetId must be provided.
@ -58,6 +58,7 @@ def get_add_router() -> APIRouter:
- GitHub repositories are cloned and all files are processed
- HTTP URLs are fetched and their content is processed
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
- datasetId value can only be the UUID of an already existing dataset
"""
from cognee.api.v1.add import add as cognee_add

View file

@ -1,7 +1,7 @@
import os
import asyncio
from uuid import UUID
from pydantic import BaseModel
from pydantic import Field
from typing import List, Optional
from fastapi.responses import JSONResponse
from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
@ -10,7 +10,6 @@ from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
from cognee.api.DTO import InDTO
from cognee.modules.pipelines.methods import get_pipeline_run
from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.users.get_user_db import get_user_db_context
from cognee.modules.graph.methods import get_formatted_graph_data
@ -30,9 +29,9 @@ logger = get_logger("api.cognify")
class CognifyPayloadDTO(InDTO):
datasets: Optional[List[str]] = None
dataset_ids: Optional[List[UUID]] = None
run_in_background: Optional[bool] = False
datasets: Optional[List[str]] = Field(default=None)
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
run_in_background: Optional[bool] = Field(default=False)
def get_cognify_router() -> APIRouter:
@ -57,8 +56,7 @@ def get_cognify_router() -> APIRouter:
## Request Parameters
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing.
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
## Response

View file

@ -1,6 +1,7 @@
from uuid import UUID
from typing import Optional
from datetime import datetime
from pydantic import Field
from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse
from cognee.modules.search.types import SearchType
@ -14,11 +15,11 @@ from cognee.modules.users.methods import get_authenticated_user
# Note: Datasets sent by name will only map to datasets owned by the request sender
# To search for datasets not owned by the request sender dataset UUID is needed
class SearchPayloadDTO(InDTO):
search_type: SearchType
datasets: Optional[list[str]] = None
dataset_ids: Optional[list[UUID]] = None
query: str
top_k: Optional[int] = 10
search_type: SearchType = Field(default=SearchType.GRAPH_COMPLETION)
datasets: Optional[list[str]] = Field(default=None)
dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]])
query: str = Field(default="What is in the document?")
top_k: Optional[int] = Field(default=10)
def get_search_router() -> APIRouter:

View file

@ -12,9 +12,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None):
return TextData(data)
if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile):
return BinaryData(
data, str(data.name).split("/")[-1] if hasattr(data, "name") else filename
)
return BinaryData(data, filename if filename else str(data.name).split("/")[-1])
try:
from s3fs import S3File