From 4ce823b4dd391af26ac66b9f8edf18680b9d3b90 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 11 Sep 2025 18:58:37 +0800
Subject: [PATCH] Handle empty context in mix mode and improve query logging

---
 lightrag/operate.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 8872ad44..77afbbf0 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -891,16 +891,18 @@ async def _process_extraction_result(
         # <XX|SEP|YY> -> <|SEP|> (handles extra characters)
         # |SEP|> -> <|SEP|> (where left | is missing)
         # <|SEP| -> <|SEP|> (where right | is missing)
-        
-        escaped_delimiter_core = re.escape(tuple_delimiter[2:-2])  # Extract "SEP" from "<|SEP|>"
-        
+
+        escaped_delimiter_core = re.escape(
+            tuple_delimiter[2:-2]
+        )  # Extract "SEP" from "<|SEP|>"
+
         # Fix: <SEP> -> <|SEP|> (missing pipes)
         record = re.sub(
             rf"<{escaped_delimiter_core}>",
             tuple_delimiter,
             record,
         )
-        
+
         # Fix: <SEP|> -> <|SEP|> (missing left pipe only)
         record = re.sub(
             rf"<{escaped_delimiter_core}\|>",
@@ -942,7 +944,7 @@ async def _process_extraction_result(
             tuple_delimiter,
             record,
         )
-        
+
         # Fix: <|SEP| -> <|SEP|> (missing closing >)
         record = re.sub(
             rf"<\|{escaped_delimiter_core}\|(?!>)",
@@ -2058,7 +2060,7 @@ async def extract_entities(
             completion_delimiter=context_base["completion_delimiter"],
         )
 
-        # Process additional gleaning results
+        # Process additional gleaning results only 1 time when entity_extract_max_gleaning is greater than zero.
         if entity_extract_max_gleaning > 0:
             glean_result = await use_llm_func_with_cache(
                 entity_continue_extraction_user_prompt,
@@ -2510,12 +2512,15 @@ async def _get_vector_context(
     try:
         # Use chunk_top_k if specified, otherwise fall back to top_k
         search_top_k = query_param.chunk_top_k or query_param.top_k
+        cosine_threshold = chunks_vdb.cosine_better_than_threshold
 
         results = await chunks_vdb.query(
             query, top_k=search_top_k, query_embedding=query_embedding
         )
         if not results:
-            logger.info(f"Naive query: 0 chunks (chunk_top_k: {search_top_k})")
+            logger.info(
+                f"Naive query: 0 chunks (chunk_top_k:{search_top_k} cosine:{cosine_threshold})"
+            )
             return []
 
         valid_chunks = []
@@ -2531,7 +2536,7 @@ async def _get_vector_context(
                 valid_chunks.append(chunk_with_metadata)
 
         logger.info(
-            f"Naive query: {len(valid_chunks)} chunks (chunk_top_k: {search_top_k})"
+            f"Naive query: {len(valid_chunks)} chunks (chunk_top_k:{search_top_k} cosine:{cosine_threshold})"
         )
         return valid_chunks
 
@@ -3398,7 +3403,11 @@ async def _build_query_context(
     )
 
     if not search_result["final_entities"] and not search_result["final_relations"]:
-        return None
+        if query_param.mode != "mix":
+            return None
+        else:
+            if not search_result["chunk_tracking"]:
+                return None
 
     # Stage 2: Apply token truncation for LLM efficiency
     truncation_result = await _apply_token_truncation(
@@ -3420,6 +3429,13 @@ async def _build_query_context(
         chunk_tracking=search_result["chunk_tracking"],
     )
 
+    if (
+        not merged_chunks
+        and not truncation_result["entities_context"]
+        and not truncation_result["relations_context"]
+    ):
+        return None
+
     # Stage 4: Build final LLM context with dynamic token processing
     context = await _build_llm_context(
         entities_context=truncation_result["entities_context"],