Track placeholders in file paths for accurate source count display

• Add has_placeholder tracking variable
• Detect placeholder patterns in paths
• Show + sign for truncated counts
This commit is contained in:
yangdx 2025-10-20 23:48:04 +08:00
parent 637b850ec5
commit e01c998ee9

View file

@ -1579,6 +1579,7 @@ async def _merge_nodes_then_upsert(
truncation_info = ""
dd_message = ""
has_placeholder = False # Initialize to track placeholder in file paths
# Combine already_description with sorted new sorted descriptions
description_list = already_description + sorted_descriptions
@ -1620,7 +1621,15 @@ async def _merge_nodes_then_upsert(
# Add truncation info from apply_source_ids_limit if truncation occurred
if len(source_ids) < len(full_source_ids):
truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}"
# Add + sign if has_placeholder is True, indicating actual file count is higher
full_source_count_str = (
f"{len(full_source_ids)}+"
if has_placeholder
else str(len(full_source_ids))
)
truncation_info = (
f"{limit_method}:{len(source_ids)}/{full_source_count_str}"
)
if dd_message or truncation_info:
status_message += (
@ -1650,6 +1659,7 @@ async def _merge_nodes_then_upsert(
# Collect and apply limit
file_paths_list = []
seen_paths = set()
has_placeholder = False # Track if already_file_paths contains placeholder
# Get placeholder to filter it out
file_path_placeholder = global_config.get(
@ -1658,12 +1668,12 @@ async def _merge_nodes_then_upsert(
# Collect from already_file_paths, excluding placeholder
for fp in already_file_paths:
# Check if this is a placeholder record
if fp and fp.startswith(f"...{file_path_placeholder}"):
has_placeholder = True
continue
# Skip placeholders (format: "...{placeholder}(showing X of Y)...")
if (
fp
and not fp.startswith(f"...{file_path_placeholder}")
and fp not in seen_paths
):
if fp and fp not in seen_paths:
file_paths_list.append(fp)
seen_paths.add(fp)
@ -1862,6 +1872,7 @@ async def _merge_edges_then_upsert(
truncation_info = ""
dd_message = ""
has_placeholder = False # Initialize to track placeholder in file paths
# Combine already_description with sorted new descriptions
description_list = already_description + sorted_descriptions
@ -1906,7 +1917,15 @@ async def _merge_edges_then_upsert(
# Add truncation info from apply_source_ids_limit if truncation occurred
if len(source_ids) < len(full_source_ids):
truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}"
# Add + sign if has_placeholder is True, indicating actual file count is higher
full_source_count_str = (
f"{len(full_source_ids)}+"
if has_placeholder
else str(len(full_source_ids))
)
truncation_info = (
f"{limit_method}:{len(source_ids)}/{full_source_count_str}"
)
if dd_message or truncation_info:
status_message += (
@ -1951,6 +1970,7 @@ async def _merge_edges_then_upsert(
# Collect and apply limit
file_paths_list = []
seen_paths = set()
has_placeholder = False # Track if already_file_paths contains placeholder
# Get placeholder to filter it out
file_path_placeholder = global_config.get(
@ -1959,12 +1979,12 @@ async def _merge_edges_then_upsert(
# Collect from already_file_paths, excluding placeholder
for fp in already_file_paths:
# Check if this is a placeholder record
if fp and fp.startswith(f"...{file_path_placeholder}"):
has_placeholder = True
continue
# Skip placeholders (format: "...{placeholder}(showing X of Y)...")
if (
fp
and not fp.startswith(f"...{file_path_placeholder}")
and fp not in seen_paths
):
if fp and fp not in seen_paths:
file_paths_list.append(fp)
seen_paths.add(fp)