From 75d1b1e9f852fbb3fa65a3b3ed6a1b155905a5e1 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 29 Jul 2025 09:53:37 +0800 Subject: [PATCH] Update Ollama context length configuration - Rename OLLAMA_NUM_CTX to OLLAMA_LLM_NUM_CTX - Increase default context window size - Add requirement for minimum context size - Update documentation examples --- env.example | 5 +++-- lightrag/api/README-zh.md | 4 ++-- lightrag/api/README.md | 4 ++-- lightrag/llm/binding_options.py | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/env.example b/env.example index ab12cc41..79cd1a13 100644 --- a/env.example +++ b/env.example @@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Set as num_ctx option for Ollama LLM -# OLLAMA_NUM_CTX=32768 +### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000) +### see also env.ollama-binding-options.example for fine tuning ollama +# OLLAMA_LLM_NUM_CTX=32768 ### Optional for Azure # AZURE_OPENAI_API_VERSION=2024-08-01-preview diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md index 95a7d660..b9b8245d 100644 --- a/lightrag/api/README-zh.md +++ b/lightrag/api/README-zh.md @@ -69,8 +69,8 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量) -OLLAMA_NUM_CTX=8192 +### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000) +OLLAMA_LLM_NUM_CTX=8192 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 25e721b2..84a8eb4a 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -69,8 +69,8 @@ LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_HOST=http://localhost:11434 # LLM_BINDING_API_KEY=your_api_key -### Ollama Server context length -OLLAMA_NUM_CTX=8192 +### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000) +OLLAMA_LLM_NUM_CTX=16384 EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 diff --git a/lightrag/llm/binding_options.py b/lightrag/llm/binding_options.py index 6c0b169c..c4e873ea 100644 --- a/lightrag/llm/binding_options.py +++ b/lightrag/llm/binding_options.py @@ -240,7 +240,7 @@ class _OllamaOptionsMixin: """Options for Ollama bindings.""" # Core context and generation parameters - num_ctx: int = 4096 # Context window size (number of tokens) + num_ctx: int = 32768 # Context window size (number of tokens) num_predict: int = 128 # Maximum number of tokens to predict num_keep: int = 0 # Number of tokens to keep from the initial prompt seed: int = -1 # Random seed for generation (-1 for random) @@ -438,7 +438,7 @@ if __name__ == "__main__": # test LLM options ollama_options = OllamaLLMOptions.options_dict(args) print(ollama_options) - print(OllamaLLMOptions(num_ctx=32768).asdict()) + print(OllamaLLMOptions(num_ctx=30000).asdict()) # test embedding options embedding_options = OllamaEmbeddingOptions.options_dict(args)