Preserve file path order by using lists instead of sets

This commit is contained in:
yangdx 2025-10-21 18:57:54 +08:00
parent fe890fca15
commit a809245aed

View file

@ -1026,7 +1026,7 @@ async def _rebuild_single_entity(
async def _update_entity_storage( async def _update_entity_storage(
final_description: str, final_description: str,
entity_type: str, entity_type: str,
file_paths: set[str], file_paths: list[str],
source_chunk_ids: list[str], source_chunk_ids: list[str],
truncation_info: str = "", truncation_info: str = "",
): ):
@ -1195,8 +1195,6 @@ async def _rebuild_single_entity(
f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})" f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})"
) )
file_paths = set(file_paths_list)
# Remove duplicates while preserving order # Remove duplicates while preserving order
description_list = list(dict.fromkeys(descriptions)) description_list = list(dict.fromkeys(descriptions))
entity_types = list(dict.fromkeys(entity_types)) entity_types = list(dict.fromkeys(entity_types))
@ -1231,7 +1229,7 @@ async def _rebuild_single_entity(
await _update_entity_storage( await _update_entity_storage(
final_description, final_description,
entity_type, entity_type,
file_paths, file_paths_list,
limited_chunk_ids, limited_chunk_ids,
truncation_info, truncation_info,
) )
@ -1354,8 +1352,6 @@ async def _rebuild_single_relationship(
f"Limited `{src}`~`{tgt}`: file_path {original_count} -> {max_file_paths} ({limit_method})" f"Limited `{src}`~`{tgt}`: file_path {original_count} -> {max_file_paths} ({limit_method})"
) )
file_paths = set(file_paths_list)
# Remove duplicates while preserving order # Remove duplicates while preserving order
description_list = list(dict.fromkeys(descriptions)) description_list = list(dict.fromkeys(descriptions))
keywords = list(dict.fromkeys(keywords)) keywords = list(dict.fromkeys(keywords))
@ -1398,8 +1394,8 @@ async def _rebuild_single_relationship(
"keywords": combined_keywords, "keywords": combined_keywords,
"weight": weight, "weight": weight,
"source_id": GRAPH_FIELD_SEP.join(limited_chunk_ids), "source_id": GRAPH_FIELD_SEP.join(limited_chunk_ids),
"file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths if fp]) "file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths_list if fp])
if file_paths if file_paths_list
else current_relationship.get("file_path", "unknown_source"), else current_relationship.get("file_path", "unknown_source"),
"truncate": truncation_info, "truncate": truncation_info,
} }