From de2daf65653ce0d06a128aecc8b3faf492c1f74b Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Tue, 26 Aug 2025 01:35:50 +0800
Subject: [PATCH] refac: Rename summary_max_tokens to summary_context_size,
 comprehensive parameter validation for summary configuration

- Update algorithm logic in operate.py for better token management
- Fix health endpoint to use correct parameter names
---
 README-zh.md                       |  9 +++++----
 README.md                          |  3 ++-
 env.example                        | 11 ++++++-----
 lightrag/api/config.py             | 13 +++++++++++--
 lightrag/api/lightrag_server.py    | 13 ++++++++-----
 lightrag/api/utils_api.py          |  4 ++--
 lightrag/constants.py              |  6 ++++--
 lightrag/lightrag.py               | 21 +++++++++++++++++++++
 lightrag/operate.py                | 24 +++++++++++++-----------
 lightrag_webui/src/api/lightrag.ts |  1 -
 10 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/README-zh.md b/README-zh.md
index 8b7239ec..d3403b35 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -268,7 +268,8 @@ if __name__ == "__main__":
 | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
 | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `30000`（由环境变量 SUMMARY_MAX_TOKENS 设置） |
+| **summary_context_size** | `int` | 合并实体关系摘要时送给LLM的最大令牌数 | `10000`（由环境变量 SUMMARY_MAX_CONTEXT 设置） |
+| **summary_max_tokens** | `int` | 合并实体关系描述的最大令牌数长度 | `500`（由环境变量 SUMMARY_MAX_TOKENS 设置） |
 | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`（默认值由环境变量MAX_ASYNC更改） |
 | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
 | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数，如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2（默认值由环境变量COSINE_THRESHOLD更改） |
@@ -598,9 +599,9 @@ if __name__ == "__main__":
 
 为了提高检索质量，可以根据更有效的相关性评分模型对文档进行重排序。`rerank.py`文件提供了三个Reranker提供商的驱动函数：
 
-*   **Cohere / vLLM**: `cohere_rerank`
-*   **Jina AI**: `jina_rerank`
-*   **Aliyun阿里云**: `ali_rerank`
+* **Cohere / vLLM**: `cohere_rerank`
+* **Jina AI**: `jina_rerank`
+* **Aliyun阿里云**: `ali_rerank`
 
 您可以将这些函数之一注入到LightRAG对象的`rerank_model_func`属性中。这将使LightRAG的查询功能能够使用注入的函数对检索到的文本块进行重新排序。有关详细用法，请参阅`examples/rerank_example.py`文件。
 
diff --git a/README.md b/README.md
index eacb4982..5ad37f01 100644
--- a/README.md
+++ b/README.md
@@ -275,7 +275,8 @@ A full list of LightRAG init parameters:
 | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
 | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `30000`（configured by env var SUMMARY_MAX_TOKENS) |
+| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`（configured by env var SUMMARY_CONTEXT_SIZE) |
+| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`（configured by env var SUMMARY_MAX_TOKENS) |
 | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`（default value changed by env var MAX_ASYNC) |
 | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
 | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2（default value changed by env var COSINE_THRESHOLD) |
diff --git a/env.example b/env.example
index 41c77ede..a824a1f5 100644
--- a/env.example
+++ b/env.example
@@ -125,12 +125,13 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
-### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
+
+### Number of summary semgments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommented)
 # FORCE_LLM_SUMMARY_ON_MERGE=4
-# SUMMARY_MAX_TOKENS=30000
-### Maximum number of entity extraction attempts for ambiguous content
-# MAX_GLEANING=1
+### Number of tokens to trigger LLM summary on entity/relation merge
+# SUMMARY_MAX_TOKENS = 500
+### Maximum context size sent to LLM for description summary
+# SUMMARY_CONTEXT_SIZE=10000
 
 ###############################
 ### Concurrency Configuration
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index a5e352dc..f4a281a7 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -30,6 +30,7 @@ from lightrag.constants import (
     DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
     DEFAULT_MAX_ASYNC,
     DEFAULT_SUMMARY_MAX_TOKENS,
+    DEFAULT_SUMMARY_CONTEXT_SIZE,
     DEFAULT_SUMMARY_LANGUAGE,
     DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
     DEFAULT_EMBEDDING_BATCH_NUM,
@@ -119,10 +120,18 @@ def parse_args() -> argparse.Namespace:
         help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
     )
     parser.add_argument(
-        "--max-tokens",
+        "--summary-max-tokens",
         type=int,
         default=get_env_value("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
-        help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
+        help=f"Maximum token size for entity/relation summary(default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
+    )
+    parser.add_argument(
+        "--summary-context-size",
+        type=int,
+        default=get_env_value(
+            "SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE, int
+        ),
+        help=f"LLM Summary Context size (default: from env or {DEFAULT_SUMMARY_CONTEXT_SIZE})",
     )
 
     # Logging configuration
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index ec1d38d5..2cb53fcd 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -2,7 +2,7 @@
 LightRAG FastAPI Server
 """
 
-from fastapi import FastAPI, Depends, HTTPException, status
+from fastapi import FastAPI, Depends, HTTPException
 import asyncio
 import os
 import logging
@@ -472,7 +472,8 @@ def create_app(args):
             ),
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
-            summary_max_tokens=args.max_tokens,
+            summary_max_tokens=args.summary_max_tokens,
+            summary_context_size=args.summary_context_size,
             chunk_token_size=int(args.chunk_size),
             chunk_overlap_token_size=int(args.chunk_overlap_size),
             llm_model_kwargs=(
@@ -510,7 +511,8 @@ def create_app(args):
             chunk_overlap_token_size=int(args.chunk_overlap_size),
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
-            summary_max_tokens=args.max_tokens,
+            summary_max_tokens=args.summary_max_tokens,
+            summary_context_size=args.summary_context_size,
             embedding_func=embedding_func,
             kv_storage=args.kv_storage,
             graph_storage=args.graph_storage,
@@ -598,7 +600,7 @@ def create_app(args):
         username = form_data.username
         if auth_handler.accounts.get(username) != form_data.password:
             raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
+                status_code=401, detail="Incorrect credentials"
             )
 
         # Regular user login
@@ -642,7 +644,8 @@ def create_app(args):
                     "embedding_binding": args.embedding_binding,
                     "embedding_binding_host": args.embedding_binding_host,
                     "embedding_model": args.embedding_model,
-                    "max_tokens": args.max_tokens,
+                    "summary_max_tokens": args.summary_max_tokens,
+                    "summary_context_size": args.summary_context_size,
                     "kv_storage": args.kv_storage,
                     "doc_status_storage": args.doc_status_storage,
                     "graph_storage": args.graph_storage,
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index fc05716c..a53f8bee 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -242,8 +242,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.llm_model}")
     ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
     ASCIIColors.yellow(f"{args.max_async}")
-    ASCIIColors.white("    ├─ Max Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_tokens}")
+    ASCIIColors.white("    ├─ Summary Context Size: ", end="")
+    ASCIIColors.yellow(f"{args.summary_context_size}")
     ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
     ASCIIColors.yellow(f"{args.enable_llm_cache}")
     ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
diff --git a/lightrag/constants.py b/lightrag/constants.py
index 9445872e..c180e2dd 100644
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@@ -12,9 +12,11 @@ DEFAULT_MAX_GRAPH_NODES = 1000
 
 # Default values for extraction settings
 DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for summaries
-DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
 DEFAULT_MAX_GLEANING = 1
-DEFAULT_SUMMARY_MAX_TOKENS = 30000  # Default maximum token size
+
+DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
+DEFAULT_SUMMARY_MAX_TOKENS = 500  # Max token size for entity/relation summary
+DEFAULT_SUMMARY_CONTEXT_SIZE = 10000  # Default maximum token size
 
 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index fa529784..1d8c08ed 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -34,6 +34,7 @@ from lightrag.constants import (
     DEFAULT_KG_CHUNK_PICK_METHOD,
     DEFAULT_MIN_RERANK_SCORE,
     DEFAULT_SUMMARY_MAX_TOKENS,
+    DEFAULT_SUMMARY_CONTEXT_SIZE,
     DEFAULT_MAX_ASYNC,
     DEFAULT_MAX_PARALLEL_INSERT,
     DEFAULT_MAX_GRAPH_NODES,
@@ -285,6 +286,11 @@ class LightRAG:
     summary_max_tokens: int = field(
         default=int(os.getenv("SUMMARY_MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
     )
+    """Maximum tokens allowed for entity/relation description."""
+
+    summary_context_size: int = field(
+        default=int(os.getenv("SUMMARY_CONTEXT_SIZE", DEFAULT_SUMMARY_CONTEXT_SIZE))
+    )
     """Maximum number of tokens allowed per LLM response."""
 
     llm_model_max_async: int = field(
@@ -416,6 +422,21 @@ class LightRAG:
         if self.ollama_server_infos is None:
             self.ollama_server_infos = OllamaServerInfos()
 
+
+        # Validate config
+        if self.force_llm_summary_on_merge < 3:
+            logger.warning(
+                f"force_llm_summary_on_merge should be at least 3, got {self.force_llm_summary_on_merge}"
+            )
+        if self.summary_max_tokens * self.force_llm_summary_on_merge > self.summary_context_size:
+            logger.warning(
+                f"summary_context_size must be at least summary_max_tokens * force_llm_summary_on_merge, got {self.summary_context_size}"
+            )
+        if self.summary_context_size > self.max_total_tokens:
+            logger.warning(
+                f"summary_context_size must be less than max_total_tokens, got {self.summary_context_size}"
+            )
+
         # Fix global_config now
         global_config = asdict(self)
 
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 5ad573cc..e59e944d 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -124,10 +124,11 @@ async def _handle_entity_relation_summary(
     """Handle entity relation description summary using map-reduce approach.
 
     This function summarizes a list of descriptions using a map-reduce strategy:
-    1. If total tokens <= summary_max_tokens, summarize directly
-    2. Otherwise, split descriptions into chunks that fit within token limits
-    3. Summarize each chunk, then recursively process the summaries
-    4. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
+    1. If total tokens < summary_context_size and len(description_list) < force_llm_summary_on_merge, no need to summarize
+    2. If total tokens < summary_max_tokens, summarize with LLM directly
+    3. Otherwise, split descriptions into chunks that fit within token limits
+    4. Summarize each chunk, then recursively process the summaries
+    5. Continue until we get a final summary within token limits or num of descriptions is less than force_llm_summary_on_merge
 
     Args:
         entity_or_relation_name: Name of the entity or relation being summarized
@@ -148,6 +149,7 @@ async def _handle_entity_relation_summary(
 
     # Get configuration
     tokenizer: Tokenizer = global_config["tokenizer"]
+    summary_context_size = global_config["summary_context_size"]
     summary_max_tokens = global_config["summary_max_tokens"]
 
     current_list = description_list[:]  # Copy the list to avoid modifying original
@@ -158,11 +160,11 @@ async def _handle_entity_relation_summary(
         total_tokens = sum(len(tokenizer.encode(desc)) for desc in current_list)
 
         # If total length is within limits, perform final summarization
-        if (
-            total_tokens <= summary_max_tokens
-            or len(current_list) < force_llm_summary_on_merge
-        ):
-            if len(current_list) < force_llm_summary_on_merge:
+        if total_tokens <= summary_context_size:
+            if (
+                len(current_list) < force_llm_summary_on_merge
+                and total_tokens < summary_max_tokens
+            ):
                 # Already the final result
                 final_description = seperator.join(current_list)
                 return final_description if final_description else ""
@@ -184,9 +186,9 @@ async def _handle_entity_relation_summary(
             desc_tokens = len(tokenizer.encode(desc))
 
             # If adding current description would exceed limit, finalize current chunk
-            if current_tokens + desc_tokens > summary_max_tokens and current_chunk:
+            if current_tokens + desc_tokens > summary_context_size and current_chunk:
                 chunks.append(current_chunk)
-                current_chunk = [desc]
+                current_chunk = [desc]  # Intial chunk for next group
                 current_tokens = desc_tokens
             else:
                 current_chunk.append(desc)
diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts
index d2f23f12..265126c7 100644
--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@@ -35,7 +35,6 @@ export type LightragStatus = {
     embedding_binding: string
     embedding_binding_host: string
     embedding_model: string
-    max_tokens: number
     kv_storage: string
     doc_status_storage: string
     graph_storage: string