From e01c998ee92d930a689fa8655227d4a632d2615b Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 20 Oct 2025 23:48:04 +0800 Subject: [PATCH] Track placeholders in file paths for accurate source count display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add has_placeholder tracking variable • Detect placeholder patterns in paths • Show + sign for truncated counts --- lightrag/operate.py | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 3e889eb7..a5e168be 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1579,6 +1579,7 @@ async def _merge_nodes_then_upsert( truncation_info = "" dd_message = "" + has_placeholder = False # Initialize to track placeholder in file paths # Combine already_description with sorted new sorted descriptions description_list = already_description + sorted_descriptions @@ -1620,7 +1621,15 @@ async def _merge_nodes_then_upsert( # Add truncation info from apply_source_ids_limit if truncation occurred if len(source_ids) < len(full_source_ids): - truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" + # Add + sign if has_placeholder is True, indicating actual file count is higher + full_source_count_str = ( + f"{len(full_source_ids)}+" + if has_placeholder + else str(len(full_source_ids)) + ) + truncation_info = ( + f"{limit_method}:{len(source_ids)}/{full_source_count_str}" + ) if dd_message or truncation_info: status_message += ( @@ -1650,6 +1659,7 @@ async def _merge_nodes_then_upsert( # Collect and apply limit file_paths_list = [] seen_paths = set() + has_placeholder = False # Track if already_file_paths contains placeholder # Get placeholder to filter it out file_path_placeholder = global_config.get( @@ -1658,12 +1668,12 @@ async def _merge_nodes_then_upsert( # Collect from already_file_paths, excluding placeholder for fp in already_file_paths: + # Check if this is a placeholder record + if fp and fp.startswith(f"...{file_path_placeholder}"): + has_placeholder = True + continue # Skip placeholders (format: "...{placeholder}(showing X of Y)...") - if ( - fp - and not fp.startswith(f"...{file_path_placeholder}") - and fp not in seen_paths - ): + if fp and fp not in seen_paths: file_paths_list.append(fp) seen_paths.add(fp) @@ -1862,6 +1872,7 @@ async def _merge_edges_then_upsert( truncation_info = "" dd_message = "" + has_placeholder = False # Initialize to track placeholder in file paths # Combine already_description with sorted new descriptions description_list = already_description + sorted_descriptions @@ -1906,7 +1917,15 @@ async def _merge_edges_then_upsert( # Add truncation info from apply_source_ids_limit if truncation occurred if len(source_ids) < len(full_source_ids): - truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" + # Add + sign if has_placeholder is True, indicating actual file count is higher + full_source_count_str = ( + f"{len(full_source_ids)}+" + if has_placeholder + else str(len(full_source_ids)) + ) + truncation_info = ( + f"{limit_method}:{len(source_ids)}/{full_source_count_str}" + ) if dd_message or truncation_info: status_message += ( @@ -1951,6 +1970,7 @@ async def _merge_edges_then_upsert( # Collect and apply limit file_paths_list = [] seen_paths = set() + has_placeholder = False # Track if already_file_paths contains placeholder # Get placeholder to filter it out file_path_placeholder = global_config.get( @@ -1959,12 +1979,12 @@ async def _merge_edges_then_upsert( # Collect from already_file_paths, excluding placeholder for fp in already_file_paths: + # Check if this is a placeholder record + if fp and fp.startswith(f"...{file_path_placeholder}"): + has_placeholder = True + continue # Skip placeholders (format: "...{placeholder}(showing X of Y)...") - if ( - fp - and not fp.startswith(f"...{file_path_placeholder}") - and fp not in seen_paths - ): + if fp and fp not in seen_paths: file_paths_list.append(fp) seen_paths.add(fp)