refactor: unify file_path handling across merge and rebuild functions

- Replace simple string concatenation with build_file_path() in:
  - _merge_edges_then_upsert
  - _rebuild_single_entity
  - _rebuild_single_relationship
- Ensures consistent deduplication, length limiting, and error handling
- Aligns with existing _merge_nodes_then_upsert implementation
This commit is contained in:
yangdx 2025-07-27 12:37:24 +08:00
parent c6cfbee3e8
commit 99e3812c38

View file

@ -681,7 +681,13 @@ async def _rebuild_single_entity(
"description": final_description, "description": final_description,
"entity_type": entity_type, "entity_type": entity_type,
"source_id": GRAPH_FIELD_SEP.join(chunk_ids), "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
"file_path": GRAPH_FIELD_SEP.join(file_paths) "file_path": build_file_path(
current_entity.get("file_path", "").split(GRAPH_FIELD_SEP)
if current_entity.get("file_path")
else [],
[{"file_path": fp} for fp in file_paths],
entity_name,
)
if file_paths if file_paths
else current_entity.get("file_path", "unknown_source"), else current_entity.get("file_path", "unknown_source"),
} }
@ -894,7 +900,13 @@ async def _rebuild_single_relationship(
"keywords": combined_keywords, "keywords": combined_keywords,
"weight": weight, "weight": weight,
"source_id": GRAPH_FIELD_SEP.join(chunk_ids), "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
"file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths if fp]) "file_path": build_file_path(
current_relationship.get("file_path", "").split(GRAPH_FIELD_SEP)
if current_relationship.get("file_path")
else [],
[{"file_path": fp} for fp in file_paths if fp],
f"{src}-{tgt}",
)
if file_paths if file_paths
else current_relationship.get("file_path", "unknown_source"), else current_relationship.get("file_path", "unknown_source"),
} }
@ -1100,12 +1112,7 @@ async def _merge_edges_then_upsert(
+ already_source_ids + already_source_ids
) )
) )
file_path = GRAPH_FIELD_SEP.join( file_path = build_file_path(already_file_paths, edges_data, f"{src_id}-{tgt_id}")
set(
[dp["file_path"] for dp in edges_data if dp.get("file_path")]
+ [fp for fp in already_file_paths if fp]
)
)
for need_insert_id in [src_id, tgt_id]: for need_insert_id in [src_id, tgt_id]:
if not (await knowledge_graph_inst.has_node(need_insert_id)): if not (await knowledge_graph_inst.has_node(need_insert_id)):