Update Ollama context length configuration

- Rename OLLAMA_NUM_CTX to OLLAMA_LLM_NUM_CTX - Increase default context window size - Add requirement for minimum context size - Update documentation examples
2025-07-29 09:53:37 +08:00 · 2025-07-29 09:53:37 +08:00 · 75d1b1e9f8
commit 75d1b1e9f8
parent 645f81f7c8
4 changed files with 9 additions and 8 deletions
--- a/env.example
+++ b/env.example
@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key

-### Set as num_ctx option for Ollama LLM
-# OLLAMA_NUM_CTX=32768
+### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
+### see also env.ollama-binding-options.example for fine tuning ollama
+# OLLAMA_LLM_NUM_CTX=32768

 ### Optional for Azure
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -69,8 +69,8 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-###  Ollama 服务器上下文 token 数（基于您的 Ollama 服务器容量）
-OLLAMA_NUM_CTX=8192
+###  Ollama 服务器上下文 token 数（必须大于 MAX_TOTAL_TOKENS+2000）
+OLLAMA_LLM_NUM_CTX=8192

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -69,8 +69,8 @@ LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-###  Ollama Server context length
-OLLAMA_NUM_CTX=8192
+###  Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
+OLLAMA_LLM_NUM_CTX=16384

 EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
--- a/lightrag/llm/binding_options.py
+++ b/lightrag/llm/binding_options.py
@ -240,7 +240,7 @@ class _OllamaOptionsMixin:
    """Options for Ollama bindings."""

    # Core context and generation parameters
-    num_ctx: int = 4096  # Context window size (number of tokens)
+    num_ctx: int = 32768  # Context window size (number of tokens)
    num_predict: int = 128  # Maximum number of tokens to predict
    num_keep: int = 0  # Number of tokens to keep from the initial prompt
    seed: int = -1  # Random seed for generation (-1 for random)
@ -438,7 +438,7 @@ if __name__ == "__main__":
        # test LLM options
        ollama_options = OllamaLLMOptions.options_dict(args)
        print(ollama_options)
-        print(OllamaLLMOptions(num_ctx=32768).asdict())
+        print(OllamaLLMOptions(num_ctx=30000).asdict())

        # test embedding options
        embedding_options = OllamaEmbeddingOptions.options_dict(args)