From f44bb5dabfca466b8e733c1e5b03153f449dc071 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 26 Sep 2025 12:44:05 +0200 Subject: [PATCH 1/3] feat: Add update endpoint to Cognee --- cognee/api/client.py | 3 + cognee/api/v1/update/__init__.py | 1 + cognee/api/v1/update/routers/__init__.py | 1 + .../v1/update/routers/get_update_router.py | 61 +++++++++++++++++++ cognee/api/v1/update/update.py | 46 ++++++++++++++ 5 files changed, 112 insertions(+) create mode 100644 cognee/api/v1/update/__init__.py create mode 100644 cognee/api/v1/update/routers/__init__.py create mode 100644 cognee/api/v1/update/routers/get_update_router.py create mode 100644 cognee/api/v1/update/update.py diff --git a/cognee/api/client.py b/cognee/api/client.py index 04c81841f..6766c12de 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -28,6 +28,7 @@ from cognee.api.v1.add.routers import get_add_router from cognee.api.v1.delete.routers import get_delete_router from cognee.api.v1.responses.routers import get_responses_router from cognee.api.v1.sync.routers import get_sync_router +from cognee.api.v1.update.routers import get_update_router from cognee.api.v1.users.routers import ( get_auth_router, get_register_router, @@ -263,6 +264,8 @@ app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["vi app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]) +app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"]) + app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"]) app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"]) diff --git a/cognee/api/v1/update/__init__.py b/cognee/api/v1/update/__init__.py new file mode 100644 index 000000000..3840db468 --- /dev/null +++ b/cognee/api/v1/update/__init__.py @@ -0,0 +1 @@ +from .update import update diff --git a/cognee/api/v1/update/routers/__init__.py b/cognee/api/v1/update/routers/__init__.py new file mode 100644 index 000000000..dbfa55603 --- /dev/null +++ b/cognee/api/v1/update/routers/__init__.py @@ -0,0 +1 @@ +from .get_update_router import get_update_router diff --git a/cognee/api/v1/update/routers/get_update_router.py b/cognee/api/v1/update/routers/get_update_router.py new file mode 100644 index 000000000..16acdf29c --- /dev/null +++ b/cognee/api/v1/update/routers/get_update_router.py @@ -0,0 +1,61 @@ +from fastapi.responses import JSONResponse +from fastapi import File, UploadFile, Depends, Form +from typing import Optional +from fastapi import APIRouter +from fastapi.encoders import jsonable_encoder +from typing import List +from uuid import UUID +from cognee.shared.logging_utils import get_logger +from cognee.modules.users.models import User +from cognee.modules.users.methods import get_authenticated_user +from cognee.shared.utils import send_telemetry +from cognee.modules.pipelines.models.PipelineRunInfo import ( + PipelineRunErrored, +) + +logger = get_logger() + + +def get_update_router() -> APIRouter: + router = APIRouter() + + @router.patch("", response_model=None) + async def update( + data_id: UUID, + dataset_id: UUID, + data: List[UploadFile] = File(default=None), + node_set: Optional[List[str]] = Form(default=[""], example=[""]), + user: User = Depends(get_authenticated_user), + ): + send_telemetry( + "Update API Endpoint Invoked", + user.id, + additional_properties={ + "endpoint": "PATCH /v1/update", + "dataset_id": str(dataset_id), + "data_id": str(data_id), + "node_set": str(node_set), + }, + ) + + from cognee.api.v1.update import update as cognee_update + + try: + update_run = await cognee_update( + data_id=data_id, + data=data, + dataset_id=dataset_id, + user=user, + node_set=node_set, + ) + + # If any cognify run errored return JSONResponse with proper error status code + if any(isinstance(v, PipelineRunErrored) for v in update_run.values()): + return JSONResponse(status_code=420, content=jsonable_encoder(update_run)) + return update_run + + except Exception as error: + logger.error(f"Error during deletion by data_id: {str(error)}") + return JSONResponse(status_code=409, content={"error": str(error)}) + + return router diff --git a/cognee/api/v1/update/update.py b/cognee/api/v1/update/update.py new file mode 100644 index 000000000..4de4333af --- /dev/null +++ b/cognee/api/v1/update/update.py @@ -0,0 +1,46 @@ +from uuid import UUID +from typing import Union, BinaryIO, List, Optional + +from cognee.modules.users.models import User +from cognee.api.v1.delete import delete +from cognee.api.v1.add import add +from cognee.api.v1.cognify import cognify + + +async def update( + data_id: UUID, + data: Union[BinaryIO, list[BinaryIO], str, list[str]], + user: User = None, + node_set: Optional[List[str]] = None, + dataset_id: Optional[UUID] = None, + vector_db_config: dict = None, + graph_db_config: dict = None, + preferred_loaders: List[str] = None, + incremental_loading: bool = True, +): + await delete( + data_id=data_id, + dataset_id=dataset_id, + user=user, + ) + + await add( + data=data, + dataset_id=dataset_id, + user=user, + node_set=node_set, + vector_db_config=vector_db_config, + graph_db_config=graph_db_config, + preferred_loaders=preferred_loaders, + incremental_loading=incremental_loading, + ) + + cognify_run = await cognify( + datasets=[dataset_id], + user=user, + vector_db_config=vector_db_config, + graph_db_config=graph_db_config, + incremental_loading=incremental_loading, + ) + + return cognify_run From 4c8e3b8bb36e16bdbbaa95f831e6bca70a481fa2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 29 Sep 2025 14:40:41 +0200 Subject: [PATCH 2/3] refactor: Add docstring to update function --- cognee/api/v1/update/update.py | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/cognee/api/v1/update/update.py b/cognee/api/v1/update/update.py index 4de4333af..b4b1f5e5a 100644 --- a/cognee/api/v1/update/update.py +++ b/cognee/api/v1/update/update.py @@ -18,6 +18,60 @@ async def update( preferred_loaders: List[str] = None, incremental_loading: bool = True, ): + """ + Update existing data in Cognee. + + Supported Input Types: + - **Text strings**: Direct text content (str) - any string not starting with "/" or "file://" + - **File paths**: Local file paths as strings in these formats: + * Absolute paths: "/path/to/document.pdf" + * File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt" + * S3 paths: "s3://bucket-name/path/to/file.pdf" + - **Binary file objects**: File handles/streams (BinaryIO) + - **Lists**: Multiple files or text strings in a single call + + Supported File Formats: + - Text files (.txt, .md, .csv) + - PDFs (.pdf) + - Images (.png, .jpg, .jpeg) - extracted via OCR/vision models + - Audio files (.mp3, .wav) - transcribed to text + - Code files (.py, .js, .ts, etc.) - parsed for structure and content + - Office documents (.docx, .pptx) + + Workflow: + 1. **Data Resolution**: Resolves file paths and validates accessibility + 2. **Content Extraction**: Extracts text content from various file formats + 3. **Dataset Storage**: Stores processed content in the specified dataset + 4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions + 5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset + + Args: + data_id: UUID of existing data to update + data: The latest version of the data. Can be: + - Single text string: "Your text content here" + - Absolute file path: "/path/to/document.pdf" + - File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt" + - S3 path: "s3://my-bucket/documents/file.pdf" + - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle] + - Binary file object: open("file.txt", "rb") + dataset_name: Name of the dataset to store data in. Defaults to "main_dataset". + Create separate datasets to organize different knowledge domains. + user: User object for authentication and permissions. Uses default user if None. + Default user: "default_user@example.com" (created automatically on first use). + Users can only access datasets they have permissions for. + node_set: Optional list of node identifiers for graph organization and access control. + Used for grouping related data points in the knowledge graph. + vector_db_config: Optional configuration for vector database (for custom setups). + graph_db_config: Optional configuration for graph database (for custom setups). + dataset_id: Optional specific dataset UUID to use instead of dataset_name. + + Returns: + PipelineRunInfo: Information about the ingestion pipeline execution including: + - Pipeline run ID for tracking + - Dataset ID where data was stored + - Processing status and any errors + - Execution timestamps and metadata + """ await delete( data_id=data_id, dataset_id=dataset_id, From e333a860ba4743835bd98dea351126ae802349fe Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 29 Sep 2025 20:42:25 +0200 Subject: [PATCH 3/3] refactor: Add documentation for update endpoint --- .../v1/update/routers/get_update_router.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cognee/api/v1/update/routers/get_update_router.py b/cognee/api/v1/update/routers/get_update_router.py index 16acdf29c..144bdd9e6 100644 --- a/cognee/api/v1/update/routers/get_update_router.py +++ b/cognee/api/v1/update/routers/get_update_router.py @@ -27,6 +27,35 @@ def get_update_router() -> APIRouter: node_set: Optional[List[str]] = Form(default=[""], example=[""]), user: User = Depends(get_authenticated_user), ): + """ + Update data in a dataset. + + This endpoint updates existing documents in a specified dataset by providing the data_id of the existing document + to update and the new document with the changes as the data. + The document is updated, analyzed, and the changes are integrated into the knowledge graph. + + ## Request Parameters + - **data_id** (UUID): UUID of the document to update in Cognee memory + - **data** (List[UploadFile]): List of files to upload. + - **datasetId** (Optional[UUID]): UUID of an already existing dataset + - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control. + Used for grouping related data points in the knowledge graph. + + ## Response + Returns information about the add operation containing: + - Status of the operation + - Details about the processed data + - Any relevant metadata from the ingestion process + + ## Error Codes + - **400 Bad Request**: Neither datasetId nor datasetName provided + - **409 Conflict**: Error during add operation + - **403 Forbidden**: User doesn't have permission to add to dataset + + ## Notes + - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True) + - datasetId value can only be the UUID of an already existing dataset + """ send_telemetry( "Update API Endpoint Invoked", user.id,