From 598eecd06d9e91df8d0e77ef0637d895dbf97564 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Mon, 28 Jul 2025 00:49:08 +0800
Subject: [PATCH] Refactor: Rename llm_model_max_token_size to
 summary_max_tokens

This commit renames the parameter 'llm_model_max_token_size' to 'summary_max_tokens' for better clarity, as it specifically controls the token limit for entity relation summaries.
---
 README-zh.md                                              | 2 +-
 README.md                                                 | 2 +-
 examples/lightrag_ollama_demo.py                          | 2 +-
 examples/unofficial-sample/lightrag_cloudflare_demo.py    | 2 +-
 .../lightrag_openai_neo4j_milvus_redis_demo.py            | 2 +-
 lightrag/api/config.py                                    | 8 ++++----
 lightrag/api/lightrag_server.py                           | 4 ++--
 lightrag/constants.py                                     | 2 +-
 lightrag/lightrag.py                                      | 5 ++++-
 lightrag/operate.py                                       | 2 +-
 10 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/README-zh.md b/README-zh.md
index 1b5ce4dc..3326d95b 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -265,7 +265,7 @@ if __name__ == "__main__":
 | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
 | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
-| **llm_model_max_token_size** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`（默认值由环境变量MAX_TOKENS更改） |
+| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`（默认值由环境变量MAX_TOKENS更改） |
 | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`（默认值由环境变量MAX_ASYNC更改） |
 | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
 | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数，如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2（默认值由环境变量COSINE_THRESHOLD更改） |
diff --git a/README.md b/README.md
index c68665c1..1af28d3b 100644
--- a/README.md
+++ b/README.md
@@ -272,7 +272,7 @@ A full list of LightRAG init parameters:
 | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
 | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **llm_model_max_token_size** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`（default value changed by env var MAX_TOKENS) |
+| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`（default value changed by env var MAX_TOKENS) |
 | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`（default value changed by env var MAX_ASYNC) |
 | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
 | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2（default value changed by env var COSINE_THRESHOLD) |
diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py
index b012f685..18fcc790 100644
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -87,7 +87,7 @@ async def initialize_rag():
         working_dir=WORKING_DIR,
         llm_model_func=ollama_model_complete,
         llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"),
-        llm_model_max_token_size=8192,
+        summary_max_tokens=8192,
         llm_model_kwargs={
             "host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
             "options": {"num_ctx": 8192},
diff --git a/examples/unofficial-sample/lightrag_cloudflare_demo.py b/examples/unofficial-sample/lightrag_cloudflare_demo.py
index b5dbabd1..b53e6714 100644
--- a/examples/unofficial-sample/lightrag_cloudflare_demo.py
+++ b/examples/unofficial-sample/lightrag_cloudflare_demo.py
@@ -211,7 +211,7 @@ async def initialize_rag():
         max_parallel_insert=2,
         llm_model_func=cloudflare_worker.query,
         llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL),
-        llm_model_max_token_size=4080,
+        summary_max_tokens=4080,
         embedding_func=EmbeddingFunc(
             embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
             max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")),
diff --git a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
index 38d89218..00845796 100644
--- a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
+++ b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
@@ -56,7 +56,7 @@ async def initialize_rag():
     rag = LightRAG(
         working_dir=WORKING_DIR,
         llm_model_func=llm_model_func,
-        llm_model_max_token_size=32768,
+        summary_max_tokens=10000,
         embedding_func=embedding_func,
         chunk_token_size=512,
         chunk_overlap_token_size=256,
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index 0294657b..e56b1749 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -22,7 +22,7 @@ from lightrag.constants import (
     DEFAULT_MIN_RERANK_SCORE,
     DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
     DEFAULT_MAX_ASYNC,
-    DEFAULT_MAX_TOKENS,
+    DEFAULT_SUMMARY_MAX_TOKENS,
     DEFAULT_SUMMARY_LANGUAGE,
     DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
     DEFAULT_EMBEDDING_BATCH_NUM,
@@ -118,13 +118,13 @@ def parse_args() -> argparse.Namespace:
         "--max-async",
         type=int,
         default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int),
-        help="Maximum async operations (default: from env or 4)",
+        help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
     )
     parser.add_argument(
         "--max-tokens",
         type=int,
-        default=get_env_value("MAX_TOKENS", DEFAULT_MAX_TOKENS, int),
-        help="Maximum token size (default: from env or 32000)",
+        default=get_env_value("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
+        help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
     )
 
     # Logging configuration
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 2d05ac97..2ae2d87b 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -347,7 +347,7 @@ def create_app(args):
             else openai_alike_model_complete,
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
-            llm_model_max_token_size=args.max_tokens,
+            summary_max_tokens=args.max_tokens,
             chunk_token_size=int(args.chunk_size),
             chunk_overlap_token_size=int(args.chunk_overlap_size),
             llm_model_kwargs={
@@ -386,7 +386,7 @@ def create_app(args):
             },
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
-            llm_model_max_token_size=args.max_tokens,
+            summary_max_tokens=args.max_tokens,
             embedding_func=embedding_func,
             kv_storage=args.kv_storage,
             graph_storage=args.graph_storage,
diff --git a/lightrag/constants.py b/lightrag/constants.py
index b02bcd6e..7fc47e21 100644
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@@ -14,7 +14,7 @@ DEFAULT_TIMEOUT = 150
 DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for summaries
 DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
 DEFAULT_MAX_GLEANING = 1
-DEFAULT_MAX_TOKENS = 10000  # Default maximum token size
+DEFAULT_SUMMARY_MAX_TOKENS = 10000  # Default maximum token size
 
 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 9df35175..c930dc66 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -32,6 +32,7 @@ from lightrag.constants import (
     DEFAULT_COSINE_THRESHOLD,
     DEFAULT_RELATED_CHUNK_NUMBER,
     DEFAULT_MIN_RERANK_SCORE,
+    DEFAULT_SUMMARY_MAX_TOKENS,
 )
 from lightrag.utils import get_env_value
 
@@ -270,7 +271,9 @@ class LightRAG:
     llm_model_name: str = field(default="gpt-4o-mini")
     """Name of the LLM model used for generating responses."""
 
-    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
+    summary_max_tokens: int = field(
+        default=int(os.getenv("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
+    )
     """Maximum number of tokens allowed per LLM response."""
 
     llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 769a9f1e..78836176 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -125,7 +125,7 @@ async def _handle_entity_relation_summary(
     use_llm_func = partial(use_llm_func, _priority=8)
 
     tokenizer: Tokenizer = global_config["tokenizer"]
-    llm_max_tokens = global_config["llm_model_max_token_size"]
+    llm_max_tokens = global_config["summary_max_tokens"]
 
     language = global_config["addon_params"].get(
         "language", PROMPTS["DEFAULT_LANGUAGE"]