feat: Add update endpoint to Cognee [COG-3016] (#1475)

<!-- .github/pull_request_template.md -->

## Description
Add update method end endpoint for Cognee

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [x] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> <sup>[Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) is
generating a summary for commit
9524109029. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
This commit is contained in:
Vasilije 2025-09-29 20:50:11 +02:00 committed by GitHub
commit 92d183aedb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 195 additions and 0 deletions

View file

@ -28,6 +28,7 @@ from cognee.api.v1.add.routers import get_add_router
from cognee.api.v1.delete.routers import get_delete_router
from cognee.api.v1.responses.routers import get_responses_router
from cognee.api.v1.sync.routers import get_sync_router
from cognee.api.v1.update.routers import get_update_router
from cognee.api.v1.users.routers import (
get_auth_router,
get_register_router,
@ -263,6 +264,8 @@ app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["vi
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])

View file

@ -0,0 +1 @@
from .update import update

View file

@ -0,0 +1 @@
from .get_update_router import get_update_router

View file

@ -0,0 +1,90 @@
from fastapi.responses import JSONResponse
from fastapi import File, UploadFile, Depends, Form
from typing import Optional
from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder
from typing import List
from uuid import UUID
from cognee.shared.logging_utils import get_logger
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.shared.utils import send_telemetry
from cognee.modules.pipelines.models.PipelineRunInfo import (
PipelineRunErrored,
)
logger = get_logger()
def get_update_router() -> APIRouter:
router = APIRouter()
@router.patch("", response_model=None)
async def update(
data_id: UUID,
dataset_id: UUID,
data: List[UploadFile] = File(default=None),
node_set: Optional[List[str]] = Form(default=[""], example=[""]),
user: User = Depends(get_authenticated_user),
):
"""
Update data in a dataset.
This endpoint updates existing documents in a specified dataset by providing the data_id of the existing document
to update and the new document with the changes as the data.
The document is updated, analyzed, and the changes are integrated into the knowledge graph.
## Request Parameters
- **data_id** (UUID): UUID of the document to update in Cognee memory
- **data** (List[UploadFile]): List of files to upload.
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
- **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
Used for grouping related data points in the knowledge graph.
## Response
Returns information about the add operation containing:
- Status of the operation
- Details about the processed data
- Any relevant metadata from the ingestion process
## Error Codes
- **400 Bad Request**: Neither datasetId nor datasetName provided
- **409 Conflict**: Error during add operation
- **403 Forbidden**: User doesn't have permission to add to dataset
## Notes
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
- datasetId value can only be the UUID of an already existing dataset
"""
send_telemetry(
"Update API Endpoint Invoked",
user.id,
additional_properties={
"endpoint": "PATCH /v1/update",
"dataset_id": str(dataset_id),
"data_id": str(data_id),
"node_set": str(node_set),
},
)
from cognee.api.v1.update import update as cognee_update
try:
update_run = await cognee_update(
data_id=data_id,
data=data,
dataset_id=dataset_id,
user=user,
node_set=node_set,
)
# If any cognify run errored return JSONResponse with proper error status code
if any(isinstance(v, PipelineRunErrored) for v in update_run.values()):
return JSONResponse(status_code=420, content=jsonable_encoder(update_run))
return update_run
except Exception as error:
logger.error(f"Error during deletion by data_id: {str(error)}")
return JSONResponse(status_code=409, content={"error": str(error)})
return router

View file

@ -0,0 +1,100 @@
from uuid import UUID
from typing import Union, BinaryIO, List, Optional
from cognee.modules.users.models import User
from cognee.api.v1.delete import delete
from cognee.api.v1.add import add
from cognee.api.v1.cognify import cognify
async def update(
data_id: UUID,
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
user: User = None,
node_set: Optional[List[str]] = None,
dataset_id: Optional[UUID] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
preferred_loaders: List[str] = None,
incremental_loading: bool = True,
):
"""
Update existing data in Cognee.
Supported Input Types:
- **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
- **File paths**: Local file paths as strings in these formats:
* Absolute paths: "/path/to/document.pdf"
* File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
* S3 paths: "s3://bucket-name/path/to/file.pdf"
- **Binary file objects**: File handles/streams (BinaryIO)
- **Lists**: Multiple files or text strings in a single call
Supported File Formats:
- Text files (.txt, .md, .csv)
- PDFs (.pdf)
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
- Audio files (.mp3, .wav) - transcribed to text
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
- Office documents (.docx, .pptx)
Workflow:
1. **Data Resolution**: Resolves file paths and validates accessibility
2. **Content Extraction**: Extracts text content from various file formats
3. **Dataset Storage**: Stores processed content in the specified dataset
4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
Args:
data_id: UUID of existing data to update
data: The latest version of the data. Can be:
- Single text string: "Your text content here"
- Absolute file path: "/path/to/document.pdf"
- File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
- S3 path: "s3://my-bucket/documents/file.pdf"
- List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
- Binary file object: open("file.txt", "rb")
dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
Create separate datasets to organize different knowledge domains.
user: User object for authentication and permissions. Uses default user if None.
Default user: "default_user@example.com" (created automatically on first use).
Users can only access datasets they have permissions for.
node_set: Optional list of node identifiers for graph organization and access control.
Used for grouping related data points in the knowledge graph.
vector_db_config: Optional configuration for vector database (for custom setups).
graph_db_config: Optional configuration for graph database (for custom setups).
dataset_id: Optional specific dataset UUID to use instead of dataset_name.
Returns:
PipelineRunInfo: Information about the ingestion pipeline execution including:
- Pipeline run ID for tracking
- Dataset ID where data was stored
- Processing status and any errors
- Execution timestamps and metadata
"""
await delete(
data_id=data_id,
dataset_id=dataset_id,
user=user,
)
await add(
data=data,
dataset_id=dataset_id,
user=user,
node_set=node_set,
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
preferred_loaders=preferred_loaders,
incremental_loading=incremental_loading,
)
cognify_run = await cognify(
datasets=[dataset_id],
user=user,
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
incremental_loading=incremental_loading,
)
return cognify_run