Merge branch 'HKUDS:main' into separator_file_path

2025-07-26 08:17:35 +08:00 · 2025-07-26 08:17:35 +08:00 · 912fc0fc31
commit 912fc0fc31
parent a49b7758e1 b3c2987006
4 changed files with 3 additions and 14 deletions
--- a/env.example
+++ b/env.example
@ -77,14 +77,13 @@ ENABLE_LLM_CACHE=true
 ### Language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 ENABLE_LLM_CACHE_FOR_EXTRACT=true
-### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
-MAX_TOKENS=32000
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 ### Entity and relation summarization configuration
-### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
+### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented)， and max tokens send to LLM
 # FORCE_LLM_SUMMARY_ON_MERGE=4
+# MAX_TOKENS=10000
 ### Maximum number of entity extraction attempts for ambiguous content
 # MAX_GLEANING=1

--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
-MAX_TOKENS=32000

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-### 发送给 LLM 进行实体关系摘要的最大 token 数（小于模型上下文大小）
-MAX_TOKENS=7500
 ###  Ollama 服务器上下文 token 数（基于您的 Ollama 服务器容量）
 OLLAMA_NUM_CTX=8192

@ -474,7 +470,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
-MAX_TOKENS=32768

 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -54,8 +54,6 @@ LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### Max tokens sent to LLM (less than model context size)
-MAX_TOKENS=32768

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length)
-MAX_TOKENS=7500
 ###  Ollama Server context length
 OLLAMA_NUM_CTX=8192

@ -478,7 +474,6 @@ MAX_PARALLEL_INSERT=2
 TIMEOUT=200
 TEMPERATURE=0.0
 MAX_ASYNC=4
-MAX_TOKENS=32768

 LLM_BINDING=openai
 LLM_MODEL=gpt-4o-mini
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -269,7 +269,7 @@ class LightRAG:
    llm_model_name: str = field(default="gpt-4o-mini")
    """Name of the LLM model used for generating responses."""

-    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000)))
+    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
    """Maximum number of tokens allowed per LLM response."""

    llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))