diff --git a/env.example b/env.example
index 9b65cd7a..6eb32538 100644
--- a/env.example
+++ b/env.example
@@ -77,14 +77,13 @@ ENABLE_LLM_CACHE=true
 ### Language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 ENABLE_LLM_CACHE_FOR_EXTRACT=true
-### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
-MAX_TOKENS=32000
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 ### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
+### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
 # FORCE_LLM_SUMMARY_ON_MERGE=4
+# MAX_TOKENS=10000
 ### Maximum number of entity extraction attempts for ambiguous content
 # MAX_GLEANING=1
 
diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md
index e2a12446..f30a3f0e 100644
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
-MAX_TOKENS=32000
 
 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
-MAX_TOKENS=7500
 ###  Ollama 服务器上下文 token 数（基于您的 Ollama 服务器容量）
 OLLAMA_NUM_CTX=8192
 
@@ -474,7 +470,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
-MAX_TOKENS=32768
 
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 2cbb68a6..9268a1e1 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### Max tokens sent to LLM (less than model context size)
-MAX_TOKENS=32768
 
 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length)
-MAX_TOKENS=7500
 ###  Ollama Server context length
 OLLAMA_NUM_CTX=8192
 
@@ -478,7 +474,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
-MAX_TOKENS=32768
 
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index b6cf53bb..8bd6857d 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -269,7 +269,7 @@ class LightRAG:
     llm_model_name: str = field(default="gpt-4o-mini")
     """Name of the LLM model used for generating responses."""
 
-    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000)))
+    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
     """Maximum number of tokens allowed per LLM response."""
 
     llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))