Update Ollama context length configuration
- Rename OLLAMA_NUM_CTX to OLLAMA_LLM_NUM_CTX - Increase default context window size - Add requirement for minimum context size - Update documentation examples
This commit is contained in:
parent
645f81f7c8
commit
75d1b1e9f8
4 changed files with 9 additions and 8 deletions
|
|
@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
|
|||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your_api_key
|
||||
|
||||
### Set as num_ctx option for Ollama LLM
|
||||
# OLLAMA_NUM_CTX=32768
|
||||
### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||
# OLLAMA_LLM_NUM_CTX=32768
|
||||
|
||||
### Optional for Azure
|
||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
|||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量)
|
||||
OLLAMA_NUM_CTX=8192
|
||||
### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000)
|
||||
OLLAMA_LLM_NUM_CTX=8192
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
|||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### Ollama Server context length
|
||||
OLLAMA_NUM_CTX=8192
|
||||
### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||
OLLAMA_LLM_NUM_CTX=16384
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ class _OllamaOptionsMixin:
|
|||
"""Options for Ollama bindings."""
|
||||
|
||||
# Core context and generation parameters
|
||||
num_ctx: int = 4096 # Context window size (number of tokens)
|
||||
num_ctx: int = 32768 # Context window size (number of tokens)
|
||||
num_predict: int = 128 # Maximum number of tokens to predict
|
||||
num_keep: int = 0 # Number of tokens to keep from the initial prompt
|
||||
seed: int = -1 # Random seed for generation (-1 for random)
|
||||
|
|
@ -438,7 +438,7 @@ if __name__ == "__main__":
|
|||
# test LLM options
|
||||
ollama_options = OllamaLLMOptions.options_dict(args)
|
||||
print(ollama_options)
|
||||
print(OllamaLLMOptions(num_ctx=32768).asdict())
|
||||
print(OllamaLLMOptions(num_ctx=30000).asdict())
|
||||
|
||||
# test embedding options
|
||||
embedding_options = OllamaEmbeddingOptions.options_dict(args)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue