refactor: Add docstring to update function

2025-09-29 14:40:41 +02:00 · 2025-09-29 14:40:41 +02:00 · 4c8e3b8bb3
commit 4c8e3b8bb3
parent f44bb5dabf
1 changed files with 54 additions and 0 deletions
--- a/cognee/api/v1/update/update.py
+++ b/cognee/api/v1/update/update.py
@ -18,6 +18,60 @@ async def update(
    preferred_loaders: List[str] = None,
    incremental_loading: bool = True,
 ):
+    """
+    Update existing data in Cognee.
+
+    Supported Input Types:
+        - **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
+        - **File paths**: Local file paths as strings in these formats:
+            * Absolute paths: "/path/to/document.pdf"
+            * File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
+            * S3 paths: "s3://bucket-name/path/to/file.pdf"
+        - **Binary file objects**: File handles/streams (BinaryIO)
+        - **Lists**: Multiple files or text strings in a single call
+
+    Supported File Formats:
+        - Text files (.txt, .md, .csv)
+        - PDFs (.pdf)
+        - Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
+        - Audio files (.mp3, .wav) - transcribed to text
+        - Code files (.py, .js, .ts, etc.) - parsed for structure and content
+        - Office documents (.docx, .pptx)
+
+            Workflow:
+        1. **Data Resolution**: Resolves file paths and validates accessibility
+        2. **Content Extraction**: Extracts text content from various file formats
+        3. **Dataset Storage**: Stores processed content in the specified dataset
+        4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
+        5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
+
+    Args:
+        data_id: UUID of existing data to update
+        data: The latest version of the data. Can be:
+            - Single text string: "Your text content here"
+            - Absolute file path: "/path/to/document.pdf"
+            - File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
+            - S3 path: "s3://my-bucket/documents/file.pdf"
+            - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
+            - Binary file object: open("file.txt", "rb")
+        dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
+                    Create separate datasets to organize different knowledge domains.
+        user: User object for authentication and permissions. Uses default user if None.
+              Default user: "default_user@example.com" (created automatically on first use).
+              Users can only access datasets they have permissions for.
+        node_set: Optional list of node identifiers for graph organization and access control.
+                 Used for grouping related data points in the knowledge graph.
+        vector_db_config: Optional configuration for vector database (for custom setups).
+        graph_db_config: Optional configuration for graph database (for custom setups).
+        dataset_id: Optional specific dataset UUID to use instead of dataset_name.
+
+    Returns:
+        PipelineRunInfo: Information about the ingestion pipeline execution including:
+            - Pipeline run ID for tracking
+            - Dataset ID where data was stored
+            - Processing status and any errors
+            - Execution timestamps and metadata
+    """
    await delete(
        data_id=data_id,
        dataset_id=dataset_id,