LightRAG/env.ollama-binding-options.example

################################################################################
# Autogenerated .env entries list for LightRAG binding options
#
# To generate run:
# $ python -m lightrag.llm.binding_options
################################################################################
# ollama_embedding -- Context window size (number of tokens)
# OLLAMA_EMBEDDING_NUM_CTX=4096

# ollama_embedding -- Maximum number of tokens to predict
# OLLAMA_EMBEDDING_NUM_PREDICT=128

# ollama_embedding -- Number of tokens to keep from the initial prompt
# OLLAMA_EMBEDDING_NUM_KEEP=0

# ollama_embedding -- Random seed for generation (-1 for random)
# OLLAMA_EMBEDDING_SEED=-1

# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative)
# OLLAMA_EMBEDDING_TEMPERATURE=0.8

# ollama_embedding -- Top-k sampling parameter (0 = disabled)
# OLLAMA_EMBEDDING_TOP_K=40

# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0)
# OLLAMA_EMBEDDING_TOP_P=0.9

# ollama_embedding -- Tail free sampling parameter (1.0 = disabled)
# OLLAMA_EMBEDDING_TFS_Z=1.0

# ollama_embedding -- Typical probability mass (1.0 = disabled)
# OLLAMA_EMBEDDING_TYPICAL_P=1.0

# ollama_embedding -- Minimum probability threshold (0.0 = disabled)
# OLLAMA_EMBEDDING_MIN_P=0.0

# ollama_embedding -- Number of tokens to consider for repetition penalty
# OLLAMA_EMBEDDING_REPEAT_LAST_N=64

# ollama_embedding -- Penalty for repetition (1.0 = no penalty)
# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1

# ollama_embedding -- Penalty for token presence (-2.0 to 2.0)
# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0

# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0)
# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0

# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
# OLLAMA_EMBEDDING_MIROSTAT=0

# ollama_embedding -- Mirostat target entropy
# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0

# ollama_embedding -- Mirostat learning rate
# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1

# ollama_embedding -- Enable NUMA optimization
# OLLAMA_EMBEDDING_NUMA=False

# ollama_embedding -- Batch size for processing
# OLLAMA_EMBEDDING_NUM_BATCH=512

# ollama_embedding -- Number of GPUs to use (-1 for auto)
# OLLAMA_EMBEDDING_NUM_GPU=-1

# ollama_embedding -- Main GPU index
# OLLAMA_EMBEDDING_MAIN_GPU=0

# ollama_embedding -- Optimize for low VRAM
# OLLAMA_EMBEDDING_LOW_VRAM=False

# ollama_embedding -- Number of CPU threads (0 for auto)
# OLLAMA_EMBEDDING_NUM_THREAD=0

# ollama_embedding -- Use half-precision for key/value cache
# OLLAMA_EMBEDDING_F16_KV=True

# ollama_embedding -- Return logits for all tokens
# OLLAMA_EMBEDDING_LOGITS_ALL=False

# ollama_embedding -- Only load vocabulary
# OLLAMA_EMBEDDING_VOCAB_ONLY=False

# ollama_embedding -- Use memory mapping for model files
# OLLAMA_EMBEDDING_USE_MMAP=True

# ollama_embedding -- Lock model in memory
# OLLAMA_EMBEDDING_USE_MLOCK=False

# ollama_embedding -- Only use for embeddings
# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False

# ollama_embedding -- Penalize newline tokens
# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True

# ollama_embedding -- Stop sequences (comma-separated string)
# OLLAMA_EMBEDDING_STOP=

# ollama_llm -- Context window size (number of tokens)
# OLLAMA_LLM_NUM_CTX=4096

# ollama_llm -- Maximum number of tokens to predict
# OLLAMA_LLM_NUM_PREDICT=128

# ollama_llm -- Number of tokens to keep from the initial prompt
# OLLAMA_LLM_NUM_KEEP=0

# ollama_llm -- Random seed for generation (-1 for random)
# OLLAMA_LLM_SEED=-1

# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative)
# OLLAMA_LLM_TEMPERATURE=0.8

# ollama_llm -- Top-k sampling parameter (0 = disabled)
# OLLAMA_LLM_TOP_K=40

# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0)
# OLLAMA_LLM_TOP_P=0.9

# ollama_llm -- Tail free sampling parameter (1.0 = disabled)
# OLLAMA_LLM_TFS_Z=1.0

# ollama_llm -- Typical probability mass (1.0 = disabled)
# OLLAMA_LLM_TYPICAL_P=1.0

# ollama_llm -- Minimum probability threshold (0.0 = disabled)
# OLLAMA_LLM_MIN_P=0.0

# ollama_llm -- Number of tokens to consider for repetition penalty
# OLLAMA_LLM_REPEAT_LAST_N=64

# ollama_llm -- Penalty for repetition (1.0 = no penalty)
# OLLAMA_LLM_REPEAT_PENALTY=1.1

# ollama_llm -- Penalty for token presence (-2.0 to 2.0)
# OLLAMA_LLM_PRESENCE_PENALTY=0.0

# ollama_llm -- Penalty for token frequency (-2.0 to 2.0)
# OLLAMA_LLM_FREQUENCY_PENALTY=0.0

# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
# OLLAMA_LLM_MIROSTAT=0

# ollama_llm -- Mirostat target entropy
# OLLAMA_LLM_MIROSTAT_TAU=5.0

# ollama_llm -- Mirostat learning rate
# OLLAMA_LLM_MIROSTAT_ETA=0.1

# ollama_llm -- Enable NUMA optimization
# OLLAMA_LLM_NUMA=False

# ollama_llm -- Batch size for processing
# OLLAMA_LLM_NUM_BATCH=512

# ollama_llm -- Number of GPUs to use (-1 for auto)
# OLLAMA_LLM_NUM_GPU=-1

# ollama_llm -- Main GPU index
# OLLAMA_LLM_MAIN_GPU=0

# ollama_llm -- Optimize for low VRAM
# OLLAMA_LLM_LOW_VRAM=False

# ollama_llm -- Number of CPU threads (0 for auto)
# OLLAMA_LLM_NUM_THREAD=0

# ollama_llm -- Use half-precision for key/value cache
# OLLAMA_LLM_F16_KV=True

# ollama_llm -- Return logits for all tokens
# OLLAMA_LLM_LOGITS_ALL=False

# ollama_llm -- Only load vocabulary
# OLLAMA_LLM_VOCAB_ONLY=False

# ollama_llm -- Use memory mapping for model files
# OLLAMA_LLM_USE_MMAP=True

# ollama_llm -- Lock model in memory
# OLLAMA_LLM_USE_MLOCK=False

# ollama_llm -- Only use for embeddings
# OLLAMA_LLM_EMBEDDING_ONLY=False

# ollama_llm -- Penalize newline tokens
# OLLAMA_LLM_PENALIZE_NEWLINE=True

# ollama_llm -- Stop sequences (comma-separated string)
# OLLAMA_LLM_STOP=

#
# End of .env entries for LightRAG binding options
################################################################################