cherry-pick be3d274a
This commit is contained in:
parent
ef2355a7ac
commit
18a8f57b89
1 changed files with 16 additions and 10 deletions
|
|
@ -1026,7 +1026,7 @@ async def _rebuild_single_entity(
|
|||
async def _update_entity_storage(
|
||||
final_description: str,
|
||||
entity_type: str,
|
||||
file_paths: list[str],
|
||||
file_paths: set[str],
|
||||
source_chunk_ids: list[str],
|
||||
truncation_info: str = "",
|
||||
):
|
||||
|
|
@ -1195,6 +1195,8 @@ async def _rebuild_single_entity(
|
|||
f"Limited `{entity_name}`: file_path {original_count} -> {max_file_paths} ({limit_method})"
|
||||
)
|
||||
|
||||
file_paths = set(file_paths_list)
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
description_list = list(dict.fromkeys(descriptions))
|
||||
entity_types = list(dict.fromkeys(entity_types))
|
||||
|
|
@ -1229,7 +1231,7 @@ async def _rebuild_single_entity(
|
|||
await _update_entity_storage(
|
||||
final_description,
|
||||
entity_type,
|
||||
file_paths_list,
|
||||
file_paths,
|
||||
limited_chunk_ids,
|
||||
truncation_info,
|
||||
)
|
||||
|
|
@ -1352,6 +1354,8 @@ async def _rebuild_single_relationship(
|
|||
f"Limited `{src}`~`{tgt}`: file_path {original_count} -> {max_file_paths} ({limit_method})"
|
||||
)
|
||||
|
||||
file_paths = set(file_paths_list)
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
description_list = list(dict.fromkeys(descriptions))
|
||||
keywords = list(dict.fromkeys(keywords))
|
||||
|
|
@ -1394,8 +1398,8 @@ async def _rebuild_single_relationship(
|
|||
"keywords": combined_keywords,
|
||||
"weight": weight,
|
||||
"source_id": GRAPH_FIELD_SEP.join(limited_chunk_ids),
|
||||
"file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths_list if fp])
|
||||
if file_paths_list
|
||||
"file_path": GRAPH_FIELD_SEP.join([fp for fp in file_paths if fp])
|
||||
if file_paths
|
||||
else current_relationship.get("file_path", "unknown_source"),
|
||||
"truncate": truncation_info,
|
||||
}
|
||||
|
|
@ -1646,12 +1650,11 @@ async def _merge_nodes_then_upsert(
|
|||
if limit_method == SOURCE_IDS_LIMIT_METHOD_FIFO:
|
||||
# FIFO: keep tail (newest), discard head
|
||||
file_paths_list = file_paths_list[-max_file_paths:]
|
||||
file_paths_list.append(f"...{file_path_placeholder}...(FIFO)")
|
||||
else:
|
||||
# KEEP: keep head (earliest), discard tail
|
||||
file_paths_list = file_paths_list[:max_file_paths]
|
||||
file_paths_list.append(f"...{file_path_placeholder}...(KEEP Old)")
|
||||
|
||||
file_paths_list.append(f"...{file_path_placeholder}({limit_method})...")
|
||||
logger.info(
|
||||
f"Limited `{entity_name}`: file_path {original_count_str} -> {max_file_paths} ({limit_method})"
|
||||
)
|
||||
|
|
@ -1673,7 +1676,7 @@ async def _merge_nodes_then_upsert(
|
|||
if limit_method == SOURCE_IDS_LIMIT_METHOD_FIFO:
|
||||
truncation_info = truncation_info_log
|
||||
else:
|
||||
truncation_info = "KEEP Old"
|
||||
truncation_info = "Keep Old Chunks"
|
||||
|
||||
deduplicated_num = already_fragment + len(nodes_data) - num_fragment
|
||||
dd_message = ""
|
||||
|
|
@ -1968,12 +1971,15 @@ async def _merge_edges_then_upsert(
|
|||
if limit_method == SOURCE_IDS_LIMIT_METHOD_FIFO:
|
||||
# FIFO: keep tail (newest), discard head
|
||||
file_paths_list = file_paths_list[-max_file_paths:]
|
||||
file_paths_list.append(f"...{file_path_placeholder}...(FIFO)")
|
||||
else:
|
||||
# KEEP: keep head (earliest), discard tail
|
||||
file_paths_list = file_paths_list[:max_file_paths]
|
||||
file_paths_list.append(f"...{file_path_placeholder}...(KEEP Old)")
|
||||
|
||||
# Add + sign if has_placeholder is True, indicating actual file count is higher
|
||||
|
||||
file_paths_list.append(
|
||||
f"...{file_path_placeholder}({limit_method}:{max_file_paths}/{original_count_str})..."
|
||||
)
|
||||
logger.info(
|
||||
f"Limited `{src_id}`~`{tgt_id}`: file_path {original_count_str} -> {max_file_paths} ({limit_method})"
|
||||
)
|
||||
|
|
@ -1995,7 +2001,7 @@ async def _merge_edges_then_upsert(
|
|||
if limit_method == SOURCE_IDS_LIMIT_METHOD_FIFO:
|
||||
truncation_info = truncation_info_log
|
||||
else:
|
||||
truncation_info = "KEEP Old"
|
||||
truncation_info = "Keep Old Chunks"
|
||||
|
||||
deduplicated_num = already_fragment + len(edges_data) - num_fragment
|
||||
dd_message = ""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue