diff --git a/lightrag/operate.py b/lightrag/operate.py index 3e889eb7..a5e168be 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1579,6 +1579,7 @@ async def _merge_nodes_then_upsert( truncation_info = "" dd_message = "" + has_placeholder = False # Initialize to track placeholder in file paths # Combine already_description with sorted new sorted descriptions description_list = already_description + sorted_descriptions @@ -1620,7 +1621,15 @@ async def _merge_nodes_then_upsert( # Add truncation info from apply_source_ids_limit if truncation occurred if len(source_ids) < len(full_source_ids): - truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" + # Add + sign if has_placeholder is True, indicating actual file count is higher + full_source_count_str = ( + f"{len(full_source_ids)}+" + if has_placeholder + else str(len(full_source_ids)) + ) + truncation_info = ( + f"{limit_method}:{len(source_ids)}/{full_source_count_str}" + ) if dd_message or truncation_info: status_message += ( @@ -1650,6 +1659,7 @@ async def _merge_nodes_then_upsert( # Collect and apply limit file_paths_list = [] seen_paths = set() + has_placeholder = False # Track if already_file_paths contains placeholder # Get placeholder to filter it out file_path_placeholder = global_config.get( @@ -1658,12 +1668,12 @@ async def _merge_nodes_then_upsert( # Collect from already_file_paths, excluding placeholder for fp in already_file_paths: + # Check if this is a placeholder record + if fp and fp.startswith(f"...{file_path_placeholder}"): + has_placeholder = True + continue # Skip placeholders (format: "...{placeholder}(showing X of Y)...") - if ( - fp - and not fp.startswith(f"...{file_path_placeholder}") - and fp not in seen_paths - ): + if fp and fp not in seen_paths: file_paths_list.append(fp) seen_paths.add(fp) @@ -1862,6 +1872,7 @@ async def _merge_edges_then_upsert( truncation_info = "" dd_message = "" + has_placeholder = False # Initialize to track placeholder in file paths # Combine already_description with sorted new descriptions description_list = already_description + sorted_descriptions @@ -1906,7 +1917,15 @@ async def _merge_edges_then_upsert( # Add truncation info from apply_source_ids_limit if truncation occurred if len(source_ids) < len(full_source_ids): - truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}" + # Add + sign if has_placeholder is True, indicating actual file count is higher + full_source_count_str = ( + f"{len(full_source_ids)}+" + if has_placeholder + else str(len(full_source_ids)) + ) + truncation_info = ( + f"{limit_method}:{len(source_ids)}/{full_source_count_str}" + ) if dd_message or truncation_info: status_message += ( @@ -1951,6 +1970,7 @@ async def _merge_edges_then_upsert( # Collect and apply limit file_paths_list = [] seen_paths = set() + has_placeholder = False # Track if already_file_paths contains placeholder # Get placeholder to filter it out file_path_placeholder = global_config.get( @@ -1959,12 +1979,12 @@ async def _merge_edges_then_upsert( # Collect from already_file_paths, excluding placeholder for fp in already_file_paths: + # Check if this is a placeholder record + if fp and fp.startswith(f"...{file_path_placeholder}"): + has_placeholder = True + continue # Skip placeholders (format: "...{placeholder}(showing X of Y)...") - if ( - fp - and not fp.startswith(f"...{file_path_placeholder}") - and fp not in seen_paths - ): + if fp and fp not in seen_paths: file_paths_list.append(fp) seen_paths.add(fp)