diff --git a/env.example b/env.example index 9b65cd7a..6eb32538 100644 --- a/env.example +++ b/env.example @@ -77,14 +77,13 @@ ENABLE_LLM_CACHE=true ### Language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English ENABLE_LLM_CACHE_FOR_EXTRACT=true -### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model) -MAX_TOKENS=32000 ### Chunk size for document splitting, 500~1500 is recommended # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 ### Entity and relation summarization configuration -### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented) +### Number of duplicated entities/edges to trigger LLM re-summary on merge (at least 3 is recommented), and max tokens send to LLM # FORCE_LLM_SUMMARY_ON_MERGE=4 +# MAX_TOKENS=10000 ### Maximum number of entity extraction attempts for ambiguous content # MAX_GLEANING=1 diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md index e2a12446..f30a3f0e 100644 --- a/lightrag/api/README-zh.md +++ b/lightrag/api/README-zh.md @@ -54,8 +54,6 @@ LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小) -MAX_TOKENS=32000 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 @@ -71,8 +69,6 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小) -MAX_TOKENS=7500 ### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量) OLLAMA_NUM_CTX=8192 @@ -474,7 +470,6 @@ MAX_PARALLEL_INSERT=2 TIMEOUT=200 TEMPERATURE=0.0 MAX_ASYNC=4 -MAX_TOKENS=32768 LLM_BINDING=openai LLM_MODEL=gpt-4o-mini diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 2cbb68a6..9268a1e1 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -54,8 +54,6 @@ LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Max tokens sent to LLM (less than model context size) -MAX_TOKENS=32768 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 @@ -71,8 +69,6 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length) -MAX_TOKENS=7500 ### Ollama Server context length OLLAMA_NUM_CTX=8192 @@ -478,7 +474,6 @@ MAX_PARALLEL_INSERT=2 TIMEOUT=200 TEMPERATURE=0.0 MAX_ASYNC=4 -MAX_TOKENS=32768 LLM_BINDING=openai LLM_MODEL=gpt-4o-mini diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index b6cf53bb..8bd6857d 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -269,7 +269,7 @@ class LightRAG: llm_model_name: str = field(default="gpt-4o-mini") """Name of the LLM model used for generating responses.""" - llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000))) + llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000))) """Maximum number of tokens allowed per LLM response.""" llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))