Reduce default MAX_TOKENS from 32000 to 10000

This commit is contained in:
yangdx 2025-07-26 08:13:49 +08:00
parent 6a99d7ac28
commit b3c2987006
4 changed files with 3 additions and 14 deletions

View file

@ -77,14 +77,13 @@ ENABLE_LLM_CACHE=true
### Language: English, Chinese, French, German ... ### Language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=English SUMMARY_LANGUAGE=English
ENABLE_LLM_CACHE_FOR_EXTRACT=true ENABLE_LLM_CACHE_FOR_EXTRACT=true
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
MAX_TOKENS=32000
### Chunk size for document splitting, 500~1500 is recommended ### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200 # CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100 # CHUNK_OVERLAP_SIZE=100
### Entity and relation summarization configuration ### Entity and relation summarization configuration
### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented) ### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented) and max tokens send to LLM
# FORCE_LLM_SUMMARY_ON_MERGE=4 # FORCE_LLM_SUMMARY_ON_MERGE=4
# MAX_TOKENS=10000
### Maximum number of entity extraction attempts for ambiguous content ### Maximum number of entity extraction attempts for ambiguous content
# MAX_GLEANING=1 # MAX_GLEANING=1

View file

@ -54,8 +54,6 @@ LLM_BINDING=openai
LLM_MODEL=gpt-4o LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key LLM_BINDING_API_KEY=your_api_key
### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小)
MAX_TOKENS=32000
EMBEDDING_BINDING=ollama EMBEDDING_BINDING=ollama
EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
LLM_MODEL=mistral-nemo:latest LLM_MODEL=mistral-nemo:latest
LLM_BINDING_HOST=http://localhost:11434 LLM_BINDING_HOST=http://localhost:11434
# LLM_BINDING_API_KEY=your_api_key # LLM_BINDING_API_KEY=your_api_key
### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小)
MAX_TOKENS=7500
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量) ### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量)
OLLAMA_NUM_CTX=8192 OLLAMA_NUM_CTX=8192
@ -474,7 +470,6 @@ MAX_PARALLEL_INSERT=2
TIMEOUT=200 TIMEOUT=200
TEMPERATURE=0.0 TEMPERATURE=0.0
MAX_ASYNC=4 MAX_ASYNC=4
MAX_TOKENS=32768
LLM_BINDING=openai LLM_BINDING=openai
LLM_MODEL=gpt-4o-mini LLM_MODEL=gpt-4o-mini

View file

@ -54,8 +54,6 @@ LLM_BINDING=openai
LLM_MODEL=gpt-4o LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key LLM_BINDING_API_KEY=your_api_key
### Max tokens sent to LLM (less than model context size)
MAX_TOKENS=32768
EMBEDDING_BINDING=ollama EMBEDDING_BINDING=ollama
EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_BINDING_HOST=http://localhost:11434
@ -71,8 +69,6 @@ LLM_BINDING=ollama
LLM_MODEL=mistral-nemo:latest LLM_MODEL=mistral-nemo:latest
LLM_BINDING_HOST=http://localhost:11434 LLM_BINDING_HOST=http://localhost:11434
# LLM_BINDING_API_KEY=your_api_key # LLM_BINDING_API_KEY=your_api_key
### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length)
MAX_TOKENS=7500
### Ollama Server context length ### Ollama Server context length
OLLAMA_NUM_CTX=8192 OLLAMA_NUM_CTX=8192
@ -478,7 +474,6 @@ MAX_PARALLEL_INSERT=2
TIMEOUT=200 TIMEOUT=200
TEMPERATURE=0.0 TEMPERATURE=0.0
MAX_ASYNC=4 MAX_ASYNC=4
MAX_TOKENS=32768
LLM_BINDING=openai LLM_BINDING=openai
LLM_MODEL=gpt-4o-mini LLM_MODEL=gpt-4o-mini

View file

@ -269,7 +269,7 @@ class LightRAG:
llm_model_name: str = field(default="gpt-4o-mini") llm_model_name: str = field(default="gpt-4o-mini")
"""Name of the LLM model used for generating responses.""" """Name of the LLM model used for generating responses."""
llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000))) llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
"""Maximum number of tokens allowed per LLM response.""" """Maximum number of tokens allowed per LLM response."""
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4))) llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))