From a809245aed58fe68a1c1a75e0d63d7db7f327978 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 21 Oct 2025 18:57:54 +0800 Subject: [PATCH] Preserve file path order by using lists instead of sets --- lightrag/operate.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index d4a86977..b3adb67d 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1026,7 +1026,7 @@ async def _rebuild_single_entity( async def _update_entity_storage( final_description: str, entity_type: str, - file_paths: set[str], + file_paths: list[str], source_chunk_ids: list[str], truncation_info: str = "", ): @@ -1195,8 +1195,6 @@ async def _rebuild_single_entity( f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})" ) - file_paths = set(file_paths_list) - # Remove duplicates while preserving order description_list = list(dict.fromkeys(descriptions)) entity_types = list(dict.fromkeys(entity_types)) @@ -1231,7 +1229,7 @@ async def _rebuild_single_entity( await _update_entity_storage( final_description, entity_type, - file_paths, + file_paths_list, limited_chunk_ids, truncation_info, ) @@ -1354,8 +1352,6 @@ async def _rebuild_single_relationship( f"Limited `{src}`~`{tgt}`: file_path {original_count} -> {max_file_paths} ({limit_method})" ) - file_paths = set(file_paths_list) - # Remove duplicates while preserving order description_list = list(dict.fromkeys(descriptions)) keywords = list(dict.fromkeys(keywords)) @@ -1398,8 +1394,8 @@ async def _rebuild_single_relationship( "keywords": combined_keywords, "weight": weight, "source_id": GRAPH_FIELD_SEP.join(limited_chunk_ids), - "file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths if fp]) - if file_paths + "file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths_list if fp]) + if file_paths_list else current_relationship.get("file_path", "unknown_source"), "truncate": truncation_info, }