From 568669ba909520dde8fc06941891ec0a08c4a7b7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 26 Oct 2025 01:34:34 +0800 Subject: [PATCH] Fix relation chunk merging to handle existing storage data - Check storage for existing relations - Merge chunks from all sources - Prevent data loss during entity merge - Update chunk counts and timestamps --- lightrag/utils_graph.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/lightrag/utils_graph.py b/lightrag/utils_graph.py index 1caaec13..1ea60600 100644 --- a/lightrag/utils_graph.py +++ b/lightrag/utils_graph.py @@ -1096,7 +1096,7 @@ async def amerge_entities( old_key ) if old_chunks_data: - # Check if new key already has data (merge case) + # Check if new key already has data in temporary dict (merge case) if new_key in new_relation_chunks_data: existing_chunks = set( new_relation_chunks_data[new_key].get( @@ -1111,7 +1111,27 @@ async def amerge_entities( "updated_at": int(time.time()), } else: - new_relation_chunks_data[new_key] = old_chunks_data + # Check if new key exists in storage (target entity already has this relation) + existing_storage_data = ( + await relation_chunks_storage.get_by_id(new_key) + ) + if existing_storage_data: + # Merge with existing storage data + existing_chunks = set( + existing_storage_data.get("chunk_ids", []) + ) + new_chunks = set( + old_chunks_data.get("chunk_ids", []) + ) + merged_chunks = existing_chunks.union(new_chunks) + new_relation_chunks_data[new_key] = { + "chunk_ids": list(merged_chunks), + "count": len(merged_chunks), + "updated_at": int(time.time()), + } + else: + # No existing data, use source data directly + new_relation_chunks_data[new_key] = old_chunks_data old_relation_keys_to_delete.append(old_key) if new_relation_chunks_data: