Enhance entity/relation editing with chunk tracking synchronization

• Add chunk storage sync to edit ops • Implement incremental chunk ID updates • Support entity renaming migrations • Normalize relation keys consistently • Preserve chunk references on edits (cherry picked from commit 3fbd704bf9)
2025-10-26 14:34:56 +08:00 · 2025-10-26 14:34:56 +08:00 · 7e0f12c28e
commit 7e0f12c28e
parent 488f67e5b2
3 changed files with 763 additions and 1083 deletions
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -3407,6 +3407,7 @@ class LightRAG:
        """Asynchronously edit entity information.

        Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
+        Also synchronizes entity_chunks_storage and relation_chunks_storage to track chunk references.

        Args:
            entity_name: Name of the entity to edit
@ -3425,6 +3426,8 @@ class LightRAG:
            entity_name,
            updated_data,
            allow_rename,
+            self.entity_chunks,
+            self.relation_chunks,
        )

    def edit_entity(
@ -3441,6 +3444,7 @@ class LightRAG:
        """Asynchronously edit relation information.

        Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
+        Also synchronizes the relation_chunks_storage to track which chunks reference this relation.

        Args:
            source_entity: Name of the source entity
@ -3459,6 +3463,7 @@ class LightRAG:
            source_entity,
            target_entity,
            updated_data,
+            self.relation_chunks,
        )

    def edit_relation(
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -2552,6 +2552,52 @@ def apply_source_ids_limit(
    return truncated


+def compute_incremental_chunk_ids(
+    existing_full_chunk_ids: list[str],
+    old_chunk_ids: list[str],
+    new_chunk_ids: list[str],
+) -> list[str]:
+    """
+    Compute incrementally updated chunk IDs based on changes.
+
+    This function applies delta changes (additions and removals) to an existing
+    list of chunk IDs while maintaining order and ensuring deduplication.
+    Delta additions from new_chunk_ids are placed at the end.
+
+    Args:
+        existing_full_chunk_ids: Complete list of existing chunk IDs from storage
+        old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
+        new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
+
+    Returns:
+        Updated list of chunk IDs with deduplication
+
+    Example:
+        >>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
+        >>> old = ['chunk-1', 'chunk-2']
+        >>> new = ['chunk-2', 'chunk-4']
+        >>> compute_incremental_chunk_ids(existing, old, new)
+        ['chunk-3', 'chunk-2', 'chunk-4']
+    """
+    # Calculate changes
+    chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
+    chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
+
+    # Apply changes to full chunk_ids
+    # Step 1: Remove chunks that are no longer needed
+    updated_chunk_ids = [
+        cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
+    ]
+
+    # Step 2: Add new chunks (preserving order from new_chunk_ids)
+    # Note: 'cid not in updated_chunk_ids' check ensures deduplication
+    for cid in new_chunk_ids:
+        if cid in chunks_to_add and cid not in updated_chunk_ids:
+            updated_chunk_ids.append(cid)
+
+    return updated_chunk_ids
+
+
 def subtract_source_ids(
    source_ids: Iterable[str],
    ids_to_remove: Collection[str],
--- a/lightrag/utils_graph.py
+++ b/lightrag/utils_graph.py