diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index f14861d6d..4f51729a3 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -14,7 +14,7 @@ async def add( node_set: Optional[List[str]] = None, vector_db_config: dict = None, graph_db_config: dict = None, - dataset_id: UUID = None, + dataset_id: Optional[UUID] = None, ): """ Add data to Cognee for knowledge graph processing. diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index e18a5f322..5162f3b7f 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -1,10 +1,10 @@ import os from uuid import UUID -from fastapi import Form, UploadFile, Depends +from fastapi import Form, File, UploadFile, Depends from fastapi.responses import JSONResponse from fastapi import APIRouter -from typing import List, Optional +from typing import List, Optional, Union, Literal import subprocess from cognee.shared.logging_utils import get_logger import requests @@ -20,9 +20,9 @@ def get_add_router() -> APIRouter: @router.post("", response_model=dict) async def add( - data: List[UploadFile], + data: List[UploadFile] = File(default=None), datasetName: Optional[str] = Form(default=None), - datasetId: Optional[UUID] = Form(default=None), + datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]), user: User = Depends(get_authenticated_user), ): """ @@ -38,7 +38,7 @@ def get_add_router() -> APIRouter: - GitHub repository URLs (will be cloned and processed) - Regular file uploads - **datasetName** (Optional[str]): Name of the dataset to add data to - - **datasetId** (Optional[UUID]): UUID of the dataset to add data to + - **datasetId** (Optional[UUID]): UUID of an already existing dataset Either datasetName or datasetId must be provided. @@ -58,6 +58,7 @@ def get_add_router() -> APIRouter: - GitHub repositories are cloned and all files are processed - HTTP URLs are fetched and their content is processed - The ALLOW_HTTP_REQUESTS environment variable controls URL processing + - datasetId value can only be the UUID of an already existing dataset """ from cognee.api.v1.add import add as cognee_add diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 59162382d..53ce5284a 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -1,7 +1,7 @@ import os import asyncio from uuid import UUID -from pydantic import BaseModel +from pydantic import Field from typing import List, Optional from fastapi.responses import JSONResponse from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect @@ -10,7 +10,6 @@ from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION from cognee.api.DTO import InDTO from cognee.modules.pipelines.methods import get_pipeline_run from cognee.modules.users.models import User -from cognee.shared.data_models import KnowledgeGraph from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.get_user_db import get_user_db_context from cognee.modules.graph.methods import get_formatted_graph_data @@ -30,9 +29,9 @@ logger = get_logger("api.cognify") class CognifyPayloadDTO(InDTO): - datasets: Optional[List[str]] = None - dataset_ids: Optional[List[UUID]] = None - run_in_background: Optional[bool] = False + datasets: Optional[List[str]] = Field(default=None) + dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]]) + run_in_background: Optional[bool] = Field(default=False) def get_cognify_router() -> APIRouter: @@ -57,8 +56,7 @@ def get_cognify_router() -> APIRouter: ## Request Parameters - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user. - - **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted). - - **graph_model** (Optional[BaseModel]): Custom Pydantic model defining the knowledge graph schema. Defaults to KnowledgeGraph for general-purpose processing. + - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted). - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). ## Response diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index e63016187..6b21b5713 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -1,6 +1,7 @@ from uuid import UUID from typing import Optional from datetime import datetime +from pydantic import Field from fastapi import Depends, APIRouter from fastapi.responses import JSONResponse from cognee.modules.search.types import SearchType @@ -14,11 +15,11 @@ from cognee.modules.users.methods import get_authenticated_user # Note: Datasets sent by name will only map to datasets owned by the request sender # To search for datasets not owned by the request sender dataset UUID is needed class SearchPayloadDTO(InDTO): - search_type: SearchType - datasets: Optional[list[str]] = None - dataset_ids: Optional[list[UUID]] = None - query: str - top_k: Optional[int] = 10 + search_type: SearchType = Field(default=SearchType.GRAPH_COMPLETION) + datasets: Optional[list[str]] = Field(default=None) + dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]]) + query: str = Field(default="What is in the document?") + top_k: Optional[int] = Field(default=10) def get_search_router() -> APIRouter: diff --git a/cognee/modules/ingestion/classify.py b/cognee/modules/ingestion/classify.py index b2fceaae3..5fc8ed83c 100644 --- a/cognee/modules/ingestion/classify.py +++ b/cognee/modules/ingestion/classify.py @@ -12,9 +12,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None): return TextData(data) if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): - return BinaryData( - data, str(data.name).split("/")[-1] if hasattr(data, "name") else filename - ) + return BinaryData(data, filename if filename else str(data.name).split("/")[-1]) try: from s3fs import S3File