Enhance entity/relation editing with chunk tracking synchronization
• Add chunk storage sync to edit ops
• Implement incremental chunk ID updates
• Support entity renaming migrations
• Normalize relation keys consistently
• Preserve chunk references on edits
(cherry picked from commit 3fbd704bf9)
This commit is contained in:
parent
488f67e5b2
commit
7e0f12c28e
3 changed files with 763 additions and 1083 deletions
|
|
@ -3407,6 +3407,7 @@ class LightRAG:
|
||||||
"""Asynchronously edit entity information.
|
"""Asynchronously edit entity information.
|
||||||
|
|
||||||
Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
|
Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
|
||||||
|
Also synchronizes entity_chunks_storage and relation_chunks_storage to track chunk references.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
entity_name: Name of the entity to edit
|
entity_name: Name of the entity to edit
|
||||||
|
|
@ -3425,6 +3426,8 @@ class LightRAG:
|
||||||
entity_name,
|
entity_name,
|
||||||
updated_data,
|
updated_data,
|
||||||
allow_rename,
|
allow_rename,
|
||||||
|
self.entity_chunks,
|
||||||
|
self.relation_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
def edit_entity(
|
def edit_entity(
|
||||||
|
|
@ -3441,6 +3444,7 @@ class LightRAG:
|
||||||
"""Asynchronously edit relation information.
|
"""Asynchronously edit relation information.
|
||||||
|
|
||||||
Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
|
Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
|
||||||
|
Also synchronizes the relation_chunks_storage to track which chunks reference this relation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
source_entity: Name of the source entity
|
source_entity: Name of the source entity
|
||||||
|
|
@ -3459,6 +3463,7 @@ class LightRAG:
|
||||||
source_entity,
|
source_entity,
|
||||||
target_entity,
|
target_entity,
|
||||||
updated_data,
|
updated_data,
|
||||||
|
self.relation_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
def edit_relation(
|
def edit_relation(
|
||||||
|
|
|
||||||
|
|
@ -2552,6 +2552,52 @@ def apply_source_ids_limit(
|
||||||
return truncated
|
return truncated
|
||||||
|
|
||||||
|
|
||||||
|
def compute_incremental_chunk_ids(
|
||||||
|
existing_full_chunk_ids: list[str],
|
||||||
|
old_chunk_ids: list[str],
|
||||||
|
new_chunk_ids: list[str],
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Compute incrementally updated chunk IDs based on changes.
|
||||||
|
|
||||||
|
This function applies delta changes (additions and removals) to an existing
|
||||||
|
list of chunk IDs while maintaining order and ensuring deduplication.
|
||||||
|
Delta additions from new_chunk_ids are placed at the end.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
existing_full_chunk_ids: Complete list of existing chunk IDs from storage
|
||||||
|
old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
|
||||||
|
new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated list of chunk IDs with deduplication
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
|
||||||
|
>>> old = ['chunk-1', 'chunk-2']
|
||||||
|
>>> new = ['chunk-2', 'chunk-4']
|
||||||
|
>>> compute_incremental_chunk_ids(existing, old, new)
|
||||||
|
['chunk-3', 'chunk-2', 'chunk-4']
|
||||||
|
"""
|
||||||
|
# Calculate changes
|
||||||
|
chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
|
||||||
|
chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
|
||||||
|
|
||||||
|
# Apply changes to full chunk_ids
|
||||||
|
# Step 1: Remove chunks that are no longer needed
|
||||||
|
updated_chunk_ids = [
|
||||||
|
cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
|
||||||
|
]
|
||||||
|
|
||||||
|
# Step 2: Add new chunks (preserving order from new_chunk_ids)
|
||||||
|
# Note: 'cid not in updated_chunk_ids' check ensures deduplication
|
||||||
|
for cid in new_chunk_ids:
|
||||||
|
if cid in chunks_to_add and cid not in updated_chunk_ids:
|
||||||
|
updated_chunk_ids.append(cid)
|
||||||
|
|
||||||
|
return updated_chunk_ids
|
||||||
|
|
||||||
|
|
||||||
def subtract_source_ids(
|
def subtract_source_ids(
|
||||||
source_ids: Iterable[str],
|
source_ids: Iterable[str],
|
||||||
ids_to_remove: Collection[str],
|
ids_to_remove: Collection[str],
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue