Enhance entity/relation editing with chunk tracking synchronization
• Add chunk storage sync to edit ops
• Implement incremental chunk ID updates
• Support entity renaming migrations
• Normalize relation keys consistently
• Preserve chunk references on edits
(cherry picked from commit 3fbd704bf9)
This commit is contained in:
parent
488f67e5b2
commit
7e0f12c28e
3 changed files with 763 additions and 1083 deletions
|
|
@ -3407,6 +3407,7 @@ class LightRAG:
|
|||
"""Asynchronously edit entity information.
|
||||
|
||||
Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
|
||||
Also synchronizes entity_chunks_storage and relation_chunks_storage to track chunk references.
|
||||
|
||||
Args:
|
||||
entity_name: Name of the entity to edit
|
||||
|
|
@ -3425,6 +3426,8 @@ class LightRAG:
|
|||
entity_name,
|
||||
updated_data,
|
||||
allow_rename,
|
||||
self.entity_chunks,
|
||||
self.relation_chunks,
|
||||
)
|
||||
|
||||
def edit_entity(
|
||||
|
|
@ -3441,6 +3444,7 @@ class LightRAG:
|
|||
"""Asynchronously edit relation information.
|
||||
|
||||
Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
|
||||
Also synchronizes the relation_chunks_storage to track which chunks reference this relation.
|
||||
|
||||
Args:
|
||||
source_entity: Name of the source entity
|
||||
|
|
@ -3459,6 +3463,7 @@ class LightRAG:
|
|||
source_entity,
|
||||
target_entity,
|
||||
updated_data,
|
||||
self.relation_chunks,
|
||||
)
|
||||
|
||||
def edit_relation(
|
||||
|
|
|
|||
|
|
@ -2552,6 +2552,52 @@ def apply_source_ids_limit(
|
|||
return truncated
|
||||
|
||||
|
||||
def compute_incremental_chunk_ids(
|
||||
existing_full_chunk_ids: list[str],
|
||||
old_chunk_ids: list[str],
|
||||
new_chunk_ids: list[str],
|
||||
) -> list[str]:
|
||||
"""
|
||||
Compute incrementally updated chunk IDs based on changes.
|
||||
|
||||
This function applies delta changes (additions and removals) to an existing
|
||||
list of chunk IDs while maintaining order and ensuring deduplication.
|
||||
Delta additions from new_chunk_ids are placed at the end.
|
||||
|
||||
Args:
|
||||
existing_full_chunk_ids: Complete list of existing chunk IDs from storage
|
||||
old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
|
||||
new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
|
||||
|
||||
Returns:
|
||||
Updated list of chunk IDs with deduplication
|
||||
|
||||
Example:
|
||||
>>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
|
||||
>>> old = ['chunk-1', 'chunk-2']
|
||||
>>> new = ['chunk-2', 'chunk-4']
|
||||
>>> compute_incremental_chunk_ids(existing, old, new)
|
||||
['chunk-3', 'chunk-2', 'chunk-4']
|
||||
"""
|
||||
# Calculate changes
|
||||
chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
|
||||
chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
|
||||
|
||||
# Apply changes to full chunk_ids
|
||||
# Step 1: Remove chunks that are no longer needed
|
||||
updated_chunk_ids = [
|
||||
cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
|
||||
]
|
||||
|
||||
# Step 2: Add new chunks (preserving order from new_chunk_ids)
|
||||
# Note: 'cid not in updated_chunk_ids' check ensures deduplication
|
||||
for cid in new_chunk_ids:
|
||||
if cid in chunks_to_add and cid not in updated_chunk_ids:
|
||||
updated_chunk_ids.append(cid)
|
||||
|
||||
return updated_chunk_ids
|
||||
|
||||
|
||||
def subtract_source_ids(
|
||||
source_ids: Iterable[str],
|
||||
ids_to_remove: Collection[str],
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue