Fix line length Create binding_options.py Remove test property Add dynamic binding options to CLI and environment config Automatically generate command-line arguments and environment variable support for all LLM provider bindings using BindingOptions. Add sample .env generation and extensible framework for new providers. Add example option definitions and fix test arg check in OllamaOptions Add options_dict method to BindingOptions for argument parsing Add comprehensive Ollama binding configuration options ruff formatting Apply ruff formatting to binding_options.py Add Ollama separate options for embedding and LLM Refactor Ollama binding options and fix class var handling The changes improve how class variables are handled in binding options and better organize the Ollama-specific options into LLM and embedding subclasses. Fix typo in arg test. Rename cls parameter to klass to avoid keyword shadowing Fix Ollama embedding binding name typo Fix ollama embedder context param name Split Ollama options into LLM and embedding configs with mixin base Add Ollama option configuration to LLM and embeddings in lightrag_server Update sample .env generation and environment handling Conditionally add env vars and cmdline options only when ollama bindings are used. Add example env file for Ollama binding options.
196 lines
5.7 KiB
Text
196 lines
5.7 KiB
Text
################################################################################
|
|
# Autogenerated .env entries list for LightRAG binding options
|
|
#
|
|
# To generate run:
|
|
# $ python -m lightrag.llm.binding_options
|
|
################################################################################
|
|
# ollama_embedding -- Context window size (number of tokens)
|
|
# OLLAMA_EMBEDDING_NUM_CTX=4096
|
|
|
|
# ollama_embedding -- Maximum number of tokens to predict
|
|
# OLLAMA_EMBEDDING_NUM_PREDICT=128
|
|
|
|
# ollama_embedding -- Number of tokens to keep from the initial prompt
|
|
# OLLAMA_EMBEDDING_NUM_KEEP=0
|
|
|
|
# ollama_embedding -- Random seed for generation (-1 for random)
|
|
# OLLAMA_EMBEDDING_SEED=-1
|
|
|
|
# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative)
|
|
# OLLAMA_EMBEDDING_TEMPERATURE=0.8
|
|
|
|
# ollama_embedding -- Top-k sampling parameter (0 = disabled)
|
|
# OLLAMA_EMBEDDING_TOP_K=40
|
|
|
|
# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0)
|
|
# OLLAMA_EMBEDDING_TOP_P=0.9
|
|
|
|
# ollama_embedding -- Tail free sampling parameter (1.0 = disabled)
|
|
# OLLAMA_EMBEDDING_TFS_Z=1.0
|
|
|
|
# ollama_embedding -- Typical probability mass (1.0 = disabled)
|
|
# OLLAMA_EMBEDDING_TYPICAL_P=1.0
|
|
|
|
# ollama_embedding -- Minimum probability threshold (0.0 = disabled)
|
|
# OLLAMA_EMBEDDING_MIN_P=0.0
|
|
|
|
# ollama_embedding -- Number of tokens to consider for repetition penalty
|
|
# OLLAMA_EMBEDDING_REPEAT_LAST_N=64
|
|
|
|
# ollama_embedding -- Penalty for repetition (1.0 = no penalty)
|
|
# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1
|
|
|
|
# ollama_embedding -- Penalty for token presence (-2.0 to 2.0)
|
|
# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0
|
|
|
|
# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0)
|
|
# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0
|
|
|
|
# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
|
|
# OLLAMA_EMBEDDING_MIROSTAT=0
|
|
|
|
# ollama_embedding -- Mirostat target entropy
|
|
# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0
|
|
|
|
# ollama_embedding -- Mirostat learning rate
|
|
# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1
|
|
|
|
# ollama_embedding -- Enable NUMA optimization
|
|
# OLLAMA_EMBEDDING_NUMA=False
|
|
|
|
# ollama_embedding -- Batch size for processing
|
|
# OLLAMA_EMBEDDING_NUM_BATCH=512
|
|
|
|
# ollama_embedding -- Number of GPUs to use (-1 for auto)
|
|
# OLLAMA_EMBEDDING_NUM_GPU=-1
|
|
|
|
# ollama_embedding -- Main GPU index
|
|
# OLLAMA_EMBEDDING_MAIN_GPU=0
|
|
|
|
# ollama_embedding -- Optimize for low VRAM
|
|
# OLLAMA_EMBEDDING_LOW_VRAM=False
|
|
|
|
# ollama_embedding -- Number of CPU threads (0 for auto)
|
|
# OLLAMA_EMBEDDING_NUM_THREAD=0
|
|
|
|
# ollama_embedding -- Use half-precision for key/value cache
|
|
# OLLAMA_EMBEDDING_F16_KV=True
|
|
|
|
# ollama_embedding -- Return logits for all tokens
|
|
# OLLAMA_EMBEDDING_LOGITS_ALL=False
|
|
|
|
# ollama_embedding -- Only load vocabulary
|
|
# OLLAMA_EMBEDDING_VOCAB_ONLY=False
|
|
|
|
# ollama_embedding -- Use memory mapping for model files
|
|
# OLLAMA_EMBEDDING_USE_MMAP=True
|
|
|
|
# ollama_embedding -- Lock model in memory
|
|
# OLLAMA_EMBEDDING_USE_MLOCK=False
|
|
|
|
# ollama_embedding -- Only use for embeddings
|
|
# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False
|
|
|
|
# ollama_embedding -- Penalize newline tokens
|
|
# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True
|
|
|
|
# ollama_embedding -- Stop sequences (comma-separated string)
|
|
# OLLAMA_EMBEDDING_STOP=
|
|
|
|
# ollama_llm -- Context window size (number of tokens)
|
|
# OLLAMA_LLM_NUM_CTX=4096
|
|
|
|
# ollama_llm -- Maximum number of tokens to predict
|
|
# OLLAMA_LLM_NUM_PREDICT=128
|
|
|
|
# ollama_llm -- Number of tokens to keep from the initial prompt
|
|
# OLLAMA_LLM_NUM_KEEP=0
|
|
|
|
# ollama_llm -- Random seed for generation (-1 for random)
|
|
# OLLAMA_LLM_SEED=-1
|
|
|
|
# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative)
|
|
# OLLAMA_LLM_TEMPERATURE=0.8
|
|
|
|
# ollama_llm -- Top-k sampling parameter (0 = disabled)
|
|
# OLLAMA_LLM_TOP_K=40
|
|
|
|
# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0)
|
|
# OLLAMA_LLM_TOP_P=0.9
|
|
|
|
# ollama_llm -- Tail free sampling parameter (1.0 = disabled)
|
|
# OLLAMA_LLM_TFS_Z=1.0
|
|
|
|
# ollama_llm -- Typical probability mass (1.0 = disabled)
|
|
# OLLAMA_LLM_TYPICAL_P=1.0
|
|
|
|
# ollama_llm -- Minimum probability threshold (0.0 = disabled)
|
|
# OLLAMA_LLM_MIN_P=0.0
|
|
|
|
# ollama_llm -- Number of tokens to consider for repetition penalty
|
|
# OLLAMA_LLM_REPEAT_LAST_N=64
|
|
|
|
# ollama_llm -- Penalty for repetition (1.0 = no penalty)
|
|
# OLLAMA_LLM_REPEAT_PENALTY=1.1
|
|
|
|
# ollama_llm -- Penalty for token presence (-2.0 to 2.0)
|
|
# OLLAMA_LLM_PRESENCE_PENALTY=0.0
|
|
|
|
# ollama_llm -- Penalty for token frequency (-2.0 to 2.0)
|
|
# OLLAMA_LLM_FREQUENCY_PENALTY=0.0
|
|
|
|
# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
|
|
# OLLAMA_LLM_MIROSTAT=0
|
|
|
|
# ollama_llm -- Mirostat target entropy
|
|
# OLLAMA_LLM_MIROSTAT_TAU=5.0
|
|
|
|
# ollama_llm -- Mirostat learning rate
|
|
# OLLAMA_LLM_MIROSTAT_ETA=0.1
|
|
|
|
# ollama_llm -- Enable NUMA optimization
|
|
# OLLAMA_LLM_NUMA=False
|
|
|
|
# ollama_llm -- Batch size for processing
|
|
# OLLAMA_LLM_NUM_BATCH=512
|
|
|
|
# ollama_llm -- Number of GPUs to use (-1 for auto)
|
|
# OLLAMA_LLM_NUM_GPU=-1
|
|
|
|
# ollama_llm -- Main GPU index
|
|
# OLLAMA_LLM_MAIN_GPU=0
|
|
|
|
# ollama_llm -- Optimize for low VRAM
|
|
# OLLAMA_LLM_LOW_VRAM=False
|
|
|
|
# ollama_llm -- Number of CPU threads (0 for auto)
|
|
# OLLAMA_LLM_NUM_THREAD=0
|
|
|
|
# ollama_llm -- Use half-precision for key/value cache
|
|
# OLLAMA_LLM_F16_KV=True
|
|
|
|
# ollama_llm -- Return logits for all tokens
|
|
# OLLAMA_LLM_LOGITS_ALL=False
|
|
|
|
# ollama_llm -- Only load vocabulary
|
|
# OLLAMA_LLM_VOCAB_ONLY=False
|
|
|
|
# ollama_llm -- Use memory mapping for model files
|
|
# OLLAMA_LLM_USE_MMAP=True
|
|
|
|
# ollama_llm -- Lock model in memory
|
|
# OLLAMA_LLM_USE_MLOCK=False
|
|
|
|
# ollama_llm -- Only use for embeddings
|
|
# OLLAMA_LLM_EMBEDDING_ONLY=False
|
|
|
|
# ollama_llm -- Penalize newline tokens
|
|
# OLLAMA_LLM_PENALIZE_NEWLINE=True
|
|
|
|
# ollama_llm -- Stop sequences (comma-separated string)
|
|
# OLLAMA_LLM_STOP=
|
|
|
|
#
|
|
# End of .env entries for LightRAG binding options
|
|
################################################################################
|
|
|