Update Ollama context length configuration

- Rename OLLAMA_NUM_CTX to OLLAMA_LLM_NUM_CTX
- Increase default context window size
- Add requirement for minimum context size
- Update documentation examples
This commit is contained in:
yangdx 2025-07-29 09:53:37 +08:00
parent 645f81f7c8
commit 75d1b1e9f8
4 changed files with 9 additions and 8 deletions

View file

@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
### Set as num_ctx option for Ollama LLM
# OLLAMA_NUM_CTX=32768
### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
### see also env.ollama-binding-options.example for fine tuning ollama
# OLLAMA_LLM_NUM_CTX=32768
### Optional for Azure
# AZURE_OPENAI_API_VERSION=2024-08-01-preview

View file

@ -69,8 +69,8 @@ LLM_BINDING=ollama
LLM_MODEL=mistral-nemo:latest
LLM_BINDING_HOST=http://localhost:11434
# LLM_BINDING_API_KEY=your_api_key
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量
OLLAMA_NUM_CTX=8192
### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000
OLLAMA_LLM_NUM_CTX=8192
EMBEDDING_BINDING=ollama
EMBEDDING_BINDING_HOST=http://localhost:11434

View file

@ -69,8 +69,8 @@ LLM_BINDING=ollama
LLM_MODEL=mistral-nemo:latest
LLM_BINDING_HOST=http://localhost:11434
# LLM_BINDING_API_KEY=your_api_key
### Ollama Server context length
OLLAMA_NUM_CTX=8192
### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
OLLAMA_LLM_NUM_CTX=16384
EMBEDDING_BINDING=ollama
EMBEDDING_BINDING_HOST=http://localhost:11434

View file

@ -240,7 +240,7 @@ class _OllamaOptionsMixin:
"""Options for Ollama bindings."""
# Core context and generation parameters
num_ctx: int = 4096 # Context window size (number of tokens)
num_ctx: int = 32768 # Context window size (number of tokens)
num_predict: int = 128 # Maximum number of tokens to predict
num_keep: int = 0 # Number of tokens to keep from the initial prompt
seed: int = -1 # Random seed for generation (-1 for random)
@ -438,7 +438,7 @@ if __name__ == "__main__":
# test LLM options
ollama_options = OllamaLLMOptions.options_dict(args)
print(ollama_options)
print(OllamaLLMOptions(num_ctx=32768).asdict())
print(OllamaLLMOptions(num_ctx=30000).asdict())
# test embedding options
embedding_options = OllamaEmbeddingOptions.options_dict(args)