Update Ollama context length configuration
- Rename OLLAMA_NUM_CTX to OLLAMA_LLM_NUM_CTX - Increase default context window size - Add requirement for minimum context size - Update documentation examples
This commit is contained in:
parent
645f81f7c8
commit
75d1b1e9f8
4 changed files with 9 additions and 8 deletions
|
|
@ -118,8 +118,9 @@ LLM_MODEL=gpt-4o
|
||||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||||
LLM_BINDING_API_KEY=your_api_key
|
LLM_BINDING_API_KEY=your_api_key
|
||||||
|
|
||||||
### Set as num_ctx option for Ollama LLM
|
### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||||
# OLLAMA_NUM_CTX=32768
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
|
# OLLAMA_LLM_NUM_CTX=32768
|
||||||
|
|
||||||
### Optional for Azure
|
### Optional for Azure
|
||||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||||
|
|
|
||||||
|
|
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
||||||
LLM_MODEL=mistral-nemo:latest
|
LLM_MODEL=mistral-nemo:latest
|
||||||
LLM_BINDING_HOST=http://localhost:11434
|
LLM_BINDING_HOST=http://localhost:11434
|
||||||
# LLM_BINDING_API_KEY=your_api_key
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量)
|
### Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000)
|
||||||
OLLAMA_NUM_CTX=8192
|
OLLAMA_LLM_NUM_CTX=8192
|
||||||
|
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
|
|
|
||||||
|
|
@ -69,8 +69,8 @@ LLM_BINDING=ollama
|
||||||
LLM_MODEL=mistral-nemo:latest
|
LLM_MODEL=mistral-nemo:latest
|
||||||
LLM_BINDING_HOST=http://localhost:11434
|
LLM_BINDING_HOST=http://localhost:11434
|
||||||
# LLM_BINDING_API_KEY=your_api_key
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
### Ollama Server context length
|
### Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
|
||||||
OLLAMA_NUM_CTX=8192
|
OLLAMA_LLM_NUM_CTX=16384
|
||||||
|
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
|
|
|
||||||
|
|
@ -240,7 +240,7 @@ class _OllamaOptionsMixin:
|
||||||
"""Options for Ollama bindings."""
|
"""Options for Ollama bindings."""
|
||||||
|
|
||||||
# Core context and generation parameters
|
# Core context and generation parameters
|
||||||
num_ctx: int = 4096 # Context window size (number of tokens)
|
num_ctx: int = 32768 # Context window size (number of tokens)
|
||||||
num_predict: int = 128 # Maximum number of tokens to predict
|
num_predict: int = 128 # Maximum number of tokens to predict
|
||||||
num_keep: int = 0 # Number of tokens to keep from the initial prompt
|
num_keep: int = 0 # Number of tokens to keep from the initial prompt
|
||||||
seed: int = -1 # Random seed for generation (-1 for random)
|
seed: int = -1 # Random seed for generation (-1 for random)
|
||||||
|
|
@ -438,7 +438,7 @@ if __name__ == "__main__":
|
||||||
# test LLM options
|
# test LLM options
|
||||||
ollama_options = OllamaLLMOptions.options_dict(args)
|
ollama_options = OllamaLLMOptions.options_dict(args)
|
||||||
print(ollama_options)
|
print(ollama_options)
|
||||||
print(OllamaLLMOptions(num_ctx=32768).asdict())
|
print(OllamaLLMOptions(num_ctx=30000).asdict())
|
||||||
|
|
||||||
# test embedding options
|
# test embedding options
|
||||||
embedding_options = OllamaEmbeddingOptions.options_dict(args)
|
embedding_options = OllamaEmbeddingOptions.options_dict(args)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue