feat: Add update endpoint to Cognee [COG-3016] (#1475)
<!-- .github/pull_request_template.md -->
## Description
Add update method end endpoint for Cognee
## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [x] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):
## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages
## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> <sup>[Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) is
generating a summary for commit
9524109029. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
This commit is contained in:
commit
92d183aedb
5 changed files with 195 additions and 0 deletions
|
|
@ -28,6 +28,7 @@ from cognee.api.v1.add.routers import get_add_router
|
||||||
from cognee.api.v1.delete.routers import get_delete_router
|
from cognee.api.v1.delete.routers import get_delete_router
|
||||||
from cognee.api.v1.responses.routers import get_responses_router
|
from cognee.api.v1.responses.routers import get_responses_router
|
||||||
from cognee.api.v1.sync.routers import get_sync_router
|
from cognee.api.v1.sync.routers import get_sync_router
|
||||||
|
from cognee.api.v1.update.routers import get_update_router
|
||||||
from cognee.api.v1.users.routers import (
|
from cognee.api.v1.users.routers import (
|
||||||
get_auth_router,
|
get_auth_router,
|
||||||
get_register_router,
|
get_register_router,
|
||||||
|
|
@ -263,6 +264,8 @@ app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["vi
|
||||||
|
|
||||||
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
|
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
|
||||||
|
|
||||||
|
app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
|
||||||
|
|
||||||
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
||||||
|
|
||||||
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
||||||
|
|
|
||||||
1
cognee/api/v1/update/__init__.py
Normal file
1
cognee/api/v1/update/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .update import update
|
||||||
1
cognee/api/v1/update/routers/__init__.py
Normal file
1
cognee/api/v1/update/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_update_router import get_update_router
|
||||||
90
cognee/api/v1/update/routers/get_update_router.py
Normal file
90
cognee/api/v1/update/routers/get_update_router.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi import File, UploadFile, Depends, Form
|
||||||
|
from typing import Optional
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from fastapi.encoders import jsonable_encoder
|
||||||
|
from typing import List
|
||||||
|
from uuid import UUID
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
from cognee.shared.utils import send_telemetry
|
||||||
|
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
||||||
|
PipelineRunErrored,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def get_update_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.patch("", response_model=None)
|
||||||
|
async def update(
|
||||||
|
data_id: UUID,
|
||||||
|
dataset_id: UUID,
|
||||||
|
data: List[UploadFile] = File(default=None),
|
||||||
|
node_set: Optional[List[str]] = Form(default=[""], example=[""]),
|
||||||
|
user: User = Depends(get_authenticated_user),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Update data in a dataset.
|
||||||
|
|
||||||
|
This endpoint updates existing documents in a specified dataset by providing the data_id of the existing document
|
||||||
|
to update and the new document with the changes as the data.
|
||||||
|
The document is updated, analyzed, and the changes are integrated into the knowledge graph.
|
||||||
|
|
||||||
|
## Request Parameters
|
||||||
|
- **data_id** (UUID): UUID of the document to update in Cognee memory
|
||||||
|
- **data** (List[UploadFile]): List of files to upload.
|
||||||
|
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
|
||||||
|
- **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
|
||||||
|
Used for grouping related data points in the knowledge graph.
|
||||||
|
|
||||||
|
## Response
|
||||||
|
Returns information about the add operation containing:
|
||||||
|
- Status of the operation
|
||||||
|
- Details about the processed data
|
||||||
|
- Any relevant metadata from the ingestion process
|
||||||
|
|
||||||
|
## Error Codes
|
||||||
|
- **400 Bad Request**: Neither datasetId nor datasetName provided
|
||||||
|
- **409 Conflict**: Error during add operation
|
||||||
|
- **403 Forbidden**: User doesn't have permission to add to dataset
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
||||||
|
- datasetId value can only be the UUID of an already existing dataset
|
||||||
|
"""
|
||||||
|
send_telemetry(
|
||||||
|
"Update API Endpoint Invoked",
|
||||||
|
user.id,
|
||||||
|
additional_properties={
|
||||||
|
"endpoint": "PATCH /v1/update",
|
||||||
|
"dataset_id": str(dataset_id),
|
||||||
|
"data_id": str(data_id),
|
||||||
|
"node_set": str(node_set),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
from cognee.api.v1.update import update as cognee_update
|
||||||
|
|
||||||
|
try:
|
||||||
|
update_run = await cognee_update(
|
||||||
|
data_id=data_id,
|
||||||
|
data=data,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
user=user,
|
||||||
|
node_set=node_set,
|
||||||
|
)
|
||||||
|
|
||||||
|
# If any cognify run errored return JSONResponse with proper error status code
|
||||||
|
if any(isinstance(v, PipelineRunErrored) for v in update_run.values()):
|
||||||
|
return JSONResponse(status_code=420, content=jsonable_encoder(update_run))
|
||||||
|
return update_run
|
||||||
|
|
||||||
|
except Exception as error:
|
||||||
|
logger.error(f"Error during deletion by data_id: {str(error)}")
|
||||||
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||||
|
|
||||||
|
return router
|
||||||
100
cognee/api/v1/update/update.py
Normal file
100
cognee/api/v1/update/update.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import Union, BinaryIO, List, Optional
|
||||||
|
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.api.v1.delete import delete
|
||||||
|
from cognee.api.v1.add import add
|
||||||
|
from cognee.api.v1.cognify import cognify
|
||||||
|
|
||||||
|
|
||||||
|
async def update(
|
||||||
|
data_id: UUID,
|
||||||
|
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
||||||
|
user: User = None,
|
||||||
|
node_set: Optional[List[str]] = None,
|
||||||
|
dataset_id: Optional[UUID] = None,
|
||||||
|
vector_db_config: dict = None,
|
||||||
|
graph_db_config: dict = None,
|
||||||
|
preferred_loaders: List[str] = None,
|
||||||
|
incremental_loading: bool = True,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Update existing data in Cognee.
|
||||||
|
|
||||||
|
Supported Input Types:
|
||||||
|
- **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
|
||||||
|
- **File paths**: Local file paths as strings in these formats:
|
||||||
|
* Absolute paths: "/path/to/document.pdf"
|
||||||
|
* File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
|
||||||
|
* S3 paths: "s3://bucket-name/path/to/file.pdf"
|
||||||
|
- **Binary file objects**: File handles/streams (BinaryIO)
|
||||||
|
- **Lists**: Multiple files or text strings in a single call
|
||||||
|
|
||||||
|
Supported File Formats:
|
||||||
|
- Text files (.txt, .md, .csv)
|
||||||
|
- PDFs (.pdf)
|
||||||
|
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
|
||||||
|
- Audio files (.mp3, .wav) - transcribed to text
|
||||||
|
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
|
||||||
|
- Office documents (.docx, .pptx)
|
||||||
|
|
||||||
|
Workflow:
|
||||||
|
1. **Data Resolution**: Resolves file paths and validates accessibility
|
||||||
|
2. **Content Extraction**: Extracts text content from various file formats
|
||||||
|
3. **Dataset Storage**: Stores processed content in the specified dataset
|
||||||
|
4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
|
||||||
|
5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_id: UUID of existing data to update
|
||||||
|
data: The latest version of the data. Can be:
|
||||||
|
- Single text string: "Your text content here"
|
||||||
|
- Absolute file path: "/path/to/document.pdf"
|
||||||
|
- File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
|
||||||
|
- S3 path: "s3://my-bucket/documents/file.pdf"
|
||||||
|
- List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
|
||||||
|
- Binary file object: open("file.txt", "rb")
|
||||||
|
dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
|
||||||
|
Create separate datasets to organize different knowledge domains.
|
||||||
|
user: User object for authentication and permissions. Uses default user if None.
|
||||||
|
Default user: "default_user@example.com" (created automatically on first use).
|
||||||
|
Users can only access datasets they have permissions for.
|
||||||
|
node_set: Optional list of node identifiers for graph organization and access control.
|
||||||
|
Used for grouping related data points in the knowledge graph.
|
||||||
|
vector_db_config: Optional configuration for vector database (for custom setups).
|
||||||
|
graph_db_config: Optional configuration for graph database (for custom setups).
|
||||||
|
dataset_id: Optional specific dataset UUID to use instead of dataset_name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PipelineRunInfo: Information about the ingestion pipeline execution including:
|
||||||
|
- Pipeline run ID for tracking
|
||||||
|
- Dataset ID where data was stored
|
||||||
|
- Processing status and any errors
|
||||||
|
- Execution timestamps and metadata
|
||||||
|
"""
|
||||||
|
await delete(
|
||||||
|
data_id=data_id,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
await add(
|
||||||
|
data=data,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
user=user,
|
||||||
|
node_set=node_set,
|
||||||
|
vector_db_config=vector_db_config,
|
||||||
|
graph_db_config=graph_db_config,
|
||||||
|
preferred_loaders=preferred_loaders,
|
||||||
|
incremental_loading=incremental_loading,
|
||||||
|
)
|
||||||
|
|
||||||
|
cognify_run = await cognify(
|
||||||
|
datasets=[dataset_id],
|
||||||
|
user=user,
|
||||||
|
vector_db_config=vector_db_config,
|
||||||
|
graph_db_config=graph_db_config,
|
||||||
|
incremental_loading=incremental_loading,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cognify_run
|
||||||
Loading…
Add table
Reference in a new issue