Reduce default MAX_TOKENS from 32000 to 10000

2025-07-26 08:13:49 +08:00 · 2025-07-26 08:13:49 +08:00 · b3c2987006
commit b3c2987006
parent 6a99d7ac28
4 changed files with 3 additions and 14 deletions
--- a/env.example
+++ b/env.example
@ -77,14 +77,13 @@ ENABLE_LLM_CACHE=true
 ### Language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
 MAX_TOKENS=32000
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 ### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
+### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
 # FORCE_LLM_SUMMARY_ON_MERGE=4
 # MAX_TOKENS=10000
 ### Maximum number of entity extraction attempts for ambiguous content
 # MAX_GLEANING=1
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
 ### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
 MAX_TOKENS=32000
 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
 ### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
 MAX_TOKENS=7500
 ###  Ollama 服务器上下文 token 数（基于您的 Ollama 服务器容量）
 OLLAMA_NUM_CTX=8192
@ -474,7 +470,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
 MAX_TOKENS=32768
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
 ### Max tokens sent to LLM (less than model context size)
 MAX_TOKENS=32768
 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
 ### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length)
 MAX_TOKENS=7500
 ###  Ollama Server context length
 OLLAMA_NUM_CTX=8192
@ -478,7 +474,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
 MAX_TOKENS=32768
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -269,7 +269,7 @@ class LightRAG:
    llm_model_name: str = field(default="gpt-4o-mini")
    """Name of the LLM model used for generating responses."""
-    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000)))
+    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
    """Maximum number of tokens allowed per LLM response."""
    llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))