From e01c998ee92d930a689fa8655227d4a632d2615b Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Mon, 20 Oct 2025 23:48:04 +0800
Subject: [PATCH] Track placeholders in file paths for accurate source count
 display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Add has_placeholder tracking variable
• Detect placeholder patterns in paths
• Show + sign for truncated counts
---
 lightrag/operate.py | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 3e889eb7..a5e168be 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1579,6 +1579,7 @@ async def _merge_nodes_then_upsert(
 
     truncation_info = ""
     dd_message = ""
+    has_placeholder = False  # Initialize to track placeholder in file paths
 
     # Combine already_description with sorted new sorted descriptions
     description_list = already_description + sorted_descriptions
@@ -1620,7 +1621,15 @@ async def _merge_nodes_then_upsert(
 
         # Add truncation info from apply_source_ids_limit if truncation occurred
         if len(source_ids) < len(full_source_ids):
-            truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}"
+            # Add + sign if has_placeholder is True, indicating actual file count is higher
+            full_source_count_str = (
+                f"{len(full_source_ids)}+"
+                if has_placeholder
+                else str(len(full_source_ids))
+            )
+            truncation_info = (
+                f"{limit_method}:{len(source_ids)}/{full_source_count_str}"
+            )
 
         if dd_message or truncation_info:
             status_message += (
@@ -1650,6 +1659,7 @@ async def _merge_nodes_then_upsert(
         # Collect and apply limit
         file_paths_list = []
         seen_paths = set()
+        has_placeholder = False  # Track if already_file_paths contains placeholder
 
         # Get placeholder to filter it out
         file_path_placeholder = global_config.get(
@@ -1658,12 +1668,12 @@ async def _merge_nodes_then_upsert(
 
         # Collect from already_file_paths, excluding placeholder
         for fp in already_file_paths:
+            # Check if this is a placeholder record
+            if fp and fp.startswith(f"...{file_path_placeholder}"):
+                has_placeholder = True
+                continue
             # Skip placeholders (format: "...{placeholder}(showing X of Y)...")
-            if (
-                fp
-                and not fp.startswith(f"...{file_path_placeholder}")
-                and fp not in seen_paths
-            ):
+            if fp and fp not in seen_paths:
                 file_paths_list.append(fp)
                 seen_paths.add(fp)
 
@@ -1862,6 +1872,7 @@ async def _merge_edges_then_upsert(
 
     truncation_info = ""
     dd_message = ""
+    has_placeholder = False  # Initialize to track placeholder in file paths
 
     # Combine already_description with sorted new descriptions
     description_list = already_description + sorted_descriptions
@@ -1906,7 +1917,15 @@ async def _merge_edges_then_upsert(
 
         # Add truncation info from apply_source_ids_limit if truncation occurred
         if len(source_ids) < len(full_source_ids):
-            truncation_info = f"{limit_method}:{len(source_ids)}/{len(full_source_ids)}"
+            # Add + sign if has_placeholder is True, indicating actual file count is higher
+            full_source_count_str = (
+                f"{len(full_source_ids)}+"
+                if has_placeholder
+                else str(len(full_source_ids))
+            )
+            truncation_info = (
+                f"{limit_method}:{len(source_ids)}/{full_source_count_str}"
+            )
 
         if dd_message or truncation_info:
             status_message += (
@@ -1951,6 +1970,7 @@ async def _merge_edges_then_upsert(
         # Collect and apply limit
         file_paths_list = []
         seen_paths = set()
+        has_placeholder = False  # Track if already_file_paths contains placeholder
 
         # Get placeholder to filter it out
         file_path_placeholder = global_config.get(
@@ -1959,12 +1979,12 @@ async def _merge_edges_then_upsert(
 
         # Collect from already_file_paths, excluding placeholder
         for fp in already_file_paths:
+            # Check if this is a placeholder record
+            if fp and fp.startswith(f"...{file_path_placeholder}"):
+                has_placeholder = True
+                continue
             # Skip placeholders (format: "...{placeholder}(showing X of Y)...")
-            if (
-                fp
-                and not fp.startswith(f"...{file_path_placeholder}")
-                and fp not in seen_paths
-            ):
+            if fp and fp not in seen_paths:
                 file_paths_list.append(fp)
                 seen_paths.add(fp)