refactor: simplify endpoint default values (#1123)

<!-- .github/pull_request_template.md -->

## Description
Simplify Cognee endpoints so default dataset ID will be None

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-07-22 15:09:44 +02:00 committed by GitHub
parent 115585ee9c
commit 022c96de55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 19 additions and 21 deletions

View file

@ -14,7 +14,7 @@ async def add(
node_set: Optional[List[str]] = None, node_set: Optional[List[str]] = None,
vector_db_config: dict = None, vector_db_config: dict = None,
graph_db_config: dict = None, graph_db_config: dict = None,
dataset_id: UUID = None, dataset_id: Optional[UUID] = None,
): ):
""" """
Add data to Cognee for knowledge graph processing. Add data to Cognee for knowledge graph processing.

View file

@ -1,10 +1,10 @@
import os import os
from uuid import UUID from uuid import UUID
from fastapi import Form, UploadFile, Depends from fastapi import Form, File, UploadFile, Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import APIRouter from fastapi import APIRouter
from typing import List, Optional from typing import List, Optional, Union, Literal
import subprocess import subprocess
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
import requests import requests
@ -20,9 +20,9 @@ def get_add_router() -> APIRouter:
@router.post("", response_model=dict) @router.post("", response_model=dict)
async def add( async def add(
data: List[UploadFile], data: List[UploadFile] = File(default=None),
datasetName: Optional[str] = Form(default=None), datasetName: Optional[str] = Form(default=None),
datasetId: Optional[UUID] = Form(default=None), datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
user: User = Depends(get_authenticated_user), user: User = Depends(get_authenticated_user),
): ):
""" """
@ -38,7 +38,7 @@ def get_add_router() -> APIRouter:
- GitHub repository URLs (will be cloned and processed) - GitHub repository URLs (will be cloned and processed)
- Regular file uploads - Regular file uploads
- **datasetName** (Optional[str]): Name of the dataset to add data to - **datasetName** (Optional[str]): Name of the dataset to add data to
- **datasetId** (Optional[UUID]): UUID of the dataset to add data to - **datasetId** (Optional[UUID]): UUID of an already existing dataset
Either datasetName or datasetId must be provided. Either datasetName or datasetId must be provided.
@ -58,6 +58,7 @@ def get_add_router() -> APIRouter:
- GitHub repositories are cloned and all files are processed - GitHub repositories are cloned and all files are processed
- HTTP URLs are fetched and their content is processed - HTTP URLs are fetched and their content is processed
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
- datasetId value can only be the UUID of an already existing dataset
""" """
from cognee.api.v1.add import add as cognee_add from cognee.api.v1.add import add as cognee_add

View file

@ -1,7 +1,7 @@
import os import os
import asyncio import asyncio
from uuid import UUID from uuid import UUID
from pydantic import BaseModel from pydantic import Field
from typing import List, Optional from typing import List, Optional
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
@ -10,7 +10,6 @@ from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
from cognee.api.DTO import InDTO from cognee.api.DTO import InDTO
from cognee.modules.pipelines.methods import get_pipeline_run from cognee.modules.pipelines.methods import get_pipeline_run
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.users.get_user_db import get_user_db_context from cognee.modules.users.get_user_db import get_user_db_context
from cognee.modules.graph.methods import get_formatted_graph_data from cognee.modules.graph.methods import get_formatted_graph_data
@ -30,9 +29,9 @@ logger = get_logger("api.cognify")
class CognifyPayloadDTO(InDTO): class CognifyPayloadDTO(InDTO):
datasets: Optional[List[str]] = None datasets: Optional[List[str]] = Field(default=None)
dataset_ids: Optional[List[UUID]] = None dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
run_in_background: Optional[bool] = False run_in_background: Optional[bool] = Field(default=False)
def get_cognify_router() -> APIRouter: def get_cognify_router() -> APIRouter:
@ -57,8 +56,7 @@ def get_cognify_router() -> APIRouter:
## Request Parameters ## Request Parameters
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user. - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted). - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
- **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing.
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
## Response ## Response

View file

@ -1,6 +1,7 @@
from uuid import UUID from uuid import UUID
from typing import Optional from typing import Optional
from datetime import datetime from datetime import datetime
from pydantic import Field
from fastapi import Depends, APIRouter from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from cognee.modules.search.types import SearchType from cognee.modules.search.types import SearchType
@ -14,11 +15,11 @@ from cognee.modules.users.methods import get_authenticated_user
# Note: Datasets sent by name will only map to datasets owned by the request sender # Note: Datasets sent by name will only map to datasets owned by the request sender
# To search for datasets not owned by the request sender dataset UUID is needed # To search for datasets not owned by the request sender dataset UUID is needed
class SearchPayloadDTO(InDTO): class SearchPayloadDTO(InDTO):
search_type: SearchType search_type: SearchType = Field(default=SearchType.GRAPH_COMPLETION)
datasets: Optional[list[str]] = None datasets: Optional[list[str]] = Field(default=None)
dataset_ids: Optional[list[UUID]] = None dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]])
query: str query: str = Field(default="What is in the document?")
top_k: Optional[int] = 10 top_k: Optional[int] = Field(default=10)
def get_search_router() -> APIRouter: def get_search_router() -> APIRouter:

View file

@ -12,9 +12,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None):
return TextData(data) return TextData(data)
if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile):
return BinaryData( return BinaryData(data, filename if filename else str(data.name).split("/")[-1])
data, str(data.name).split("/")[-1] if hasattr(data, "name") else filename
)
try: try:
from s3fs import S3File from s3fs import S3File