feat: Add update endpoint to Cognee [COG-3016] (#1475)

## Description Add update method end endpoint for Cognee ## Type of Change  - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Pre-submission Checklist  - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.  --- > [!NOTE] > <sup>[Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) is generating a summary for commit 9524109029. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>
2025-09-29 20:50:11 +02:00 · 2025-09-29 20:50:11 +02:00 · 92d183aedb
commit 92d183aedb
parent 52c978faeb 9524109029
5 changed files with 195 additions and 0 deletions
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -28,6 +28,7 @@ from cognee.api.v1.add.routers import get_add_router
 from cognee.api.v1.delete.routers import get_delete_router
 from cognee.api.v1.responses.routers import get_responses_router
 from cognee.api.v1.sync.routers import get_sync_router
 from cognee.api.v1.update.routers import get_update_router
 from cognee.api.v1.users.routers import (
    get_auth_router,
    get_register_router,
@ -263,6 +264,8 @@ app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["vi
 app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
 app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
 app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
 app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
--- a/cognee/api/v1/update/init.py
+++ b/cognee/api/v1/update/init.py
@ -0,0 +1 @@
 from .update import update
--- a/cognee/api/v1/update/routers/init.py
+++ b/cognee/api/v1/update/routers/init.py
@ -0,0 +1 @@
 from .get_update_router import get_update_router
--- a/cognee/api/v1/update/routers/get_update_router.py
+++ b/cognee/api/v1/update/routers/get_update_router.py
@ -0,0 +1,90 @@
 from fastapi.responses import JSONResponse
 from fastapi import File, UploadFile, Depends, Form
 from typing import Optional
 from fastapi import APIRouter
 from fastapi.encoders import jsonable_encoder
 from typing import List
 from uuid import UUID
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
 from cognee.modules.pipelines.models.PipelineRunInfo import (
    PipelineRunErrored,
 )
 logger = get_logger()
 def get_update_router() -> APIRouter:
    router = APIRouter()
    @router.patch("", response_model=None)
    async def update(
        data_id: UUID,
        dataset_id: UUID,
        data: List[UploadFile] = File(default=None),
        node_set: Optional[List[str]] = Form(default=[""], example=[""]),
        user: User = Depends(get_authenticated_user),
    ):
        """
        Update data in a dataset.
        This endpoint updates existing documents in a specified dataset by providing the data_id of the existing document
        to update and the new document with the changes as the data.
        The document is updated, analyzed, and the changes are integrated into the knowledge graph.
        ## Request Parameters
        - **data_id** (UUID): UUID of the document to update in Cognee memory
        - **data** (List[UploadFile]): List of files to upload.
        - **datasetId** (Optional[UUID]): UUID of an already existing dataset
        - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
                 Used for grouping related data points in the knowledge graph.
        ## Response
        Returns information about the add operation containing:
        - Status of the operation
        - Details about the processed data
        - Any relevant metadata from the ingestion process
        ## Error Codes
        - **400 Bad Request**: Neither datasetId nor datasetName provided
        - **409 Conflict**: Error during add operation
        - **403 Forbidden**: User doesn't have permission to add to dataset
        ## Notes
        - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
        - datasetId value can only be the UUID of an already existing dataset
        """
        send_telemetry(
            "Update API Endpoint Invoked",
            user.id,
            additional_properties={
                "endpoint": "PATCH /v1/update",
                "dataset_id": str(dataset_id),
                "data_id": str(data_id),
                "node_set": str(node_set),
            },
        )
        from cognee.api.v1.update import update as cognee_update
        try:
            update_run = await cognee_update(
                data_id=data_id,
                data=data,
                dataset_id=dataset_id,
                user=user,
                node_set=node_set,
            )
            # If any cognify run errored return JSONResponse with proper error status code
            if any(isinstance(v, PipelineRunErrored) for v in update_run.values()):
                return JSONResponse(status_code=420, content=jsonable_encoder(update_run))
            return update_run
        except Exception as error:
            logger.error(f"Error during deletion by data_id: {str(error)}")
            return JSONResponse(status_code=409, content={"error": str(error)})
    return router
--- a/cognee/api/v1/update/update.py
+++ b/cognee/api/v1/update/update.py
@ -0,0 +1,100 @@
 from uuid import UUID
 from typing import Union, BinaryIO, List, Optional
 from cognee.modules.users.models import User
 from cognee.api.v1.delete import delete
 from cognee.api.v1.add import add
 from cognee.api.v1.cognify import cognify
 async def update(
    data_id: UUID,
    data: Union[BinaryIO, list[BinaryIO], str, list[str]],
    user: User = None,
    node_set: Optional[List[str]] = None,
    dataset_id: Optional[UUID] = None,
    vector_db_config: dict = None,
    graph_db_config: dict = None,
    preferred_loaders: List[str] = None,
    incremental_loading: bool = True,
 ):
    """
    Update existing data in Cognee.
    Supported Input Types:
        - **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
        - **File paths**: Local file paths as strings in these formats:
            * Absolute paths: "/path/to/document.pdf"
            * File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
            * S3 paths: "s3://bucket-name/path/to/file.pdf"
        - **Binary file objects**: File handles/streams (BinaryIO)
        - **Lists**: Multiple files or text strings in a single call
    Supported File Formats:
        - Text files (.txt, .md, .csv)
        - PDFs (.pdf)
        - Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
        - Audio files (.mp3, .wav) - transcribed to text
        - Code files (.py, .js, .ts, etc.) - parsed for structure and content
        - Office documents (.docx, .pptx)
            Workflow:
        1. **Data Resolution**: Resolves file paths and validates accessibility
        2. **Content Extraction**: Extracts text content from various file formats
        3. **Dataset Storage**: Stores processed content in the specified dataset
        4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
        5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
    Args:
        data_id: UUID of existing data to update
        data: The latest version of the data. Can be:
            - Single text string: "Your text content here"
            - Absolute file path: "/path/to/document.pdf"
            - File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
            - S3 path: "s3://my-bucket/documents/file.pdf"
            - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
            - Binary file object: open("file.txt", "rb")
        dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
                    Create separate datasets to organize different knowledge domains.
        user: User object for authentication and permissions. Uses default user if None.
              Default user: "default_user@example.com" (created automatically on first use).
              Users can only access datasets they have permissions for.
        node_set: Optional list of node identifiers for graph organization and access control.
                 Used for grouping related data points in the knowledge graph.
        vector_db_config: Optional configuration for vector database (for custom setups).
        graph_db_config: Optional configuration for graph database (for custom setups).
        dataset_id: Optional specific dataset UUID to use instead of dataset_name.
    Returns:
        PipelineRunInfo: Information about the ingestion pipeline execution including:
            - Pipeline run ID for tracking
            - Dataset ID where data was stored
            - Processing status and any errors
            - Execution timestamps and metadata
    """
    await delete(
        data_id=data_id,
        dataset_id=dataset_id,
        user=user,
    )
    await add(
        data=data,
        dataset_id=dataset_id,
        user=user,
        node_set=node_set,
        vector_db_config=vector_db_config,
        graph_db_config=graph_db_config,
        preferred_loaders=preferred_loaders,
        incremental_loading=incremental_loading,
    )
    cognify_run = await cognify(
        datasets=[dataset_id],
        user=user,
        vector_db_config=vector_db_config,
        graph_db_config=graph_db_config,
        incremental_loading=incremental_loading,
    )
    return cognify_run
		`@ -0,0 +1 @@`
							`from .get_update_router import get_update_router`