diff --git a/env.example b/env.example index 534bd22a..4590ceee 100644 --- a/env.example +++ b/env.example @@ -172,6 +172,8 @@ MAX_PARALLEL_INSERT=2 ### LLM Configuration ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini ### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service +### If LightRAG deployed in Docker: +### uses host.docker.internal instead of localhost in LLM_BINDING_HOST ########################################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 @@ -181,7 +183,7 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Optional for Azure +### Env vars for Azure openai # AZURE_OPENAI_API_VERSION=2024-08-01-preview # AZURE_OPENAI_DEPLOYMENT=gpt-4o @@ -196,22 +198,16 @@ LLM_BINDING_API_KEY=your_api_key # LLM_MODEL=gemini-flash-latest # LLM_BINDING_API_KEY=your_gemini_api_key # LLM_BINDING_HOST=https://generativelanguage.googleapis.com -GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' + +### use the following command to see all support options for OpenAI, azure_openai or OpenRouter +### lightrag-server --llm-binding gemini --help +### Gemini Specific Parameters # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 # GEMINI_LLM_TEMPERATURE=0.7 - -### OpenAI Compatible API Specific Parameters -### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. -# OPENAI_LLM_TEMPERATURE=0.9 -### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) -### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions -### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider -# OPENAI_LLM_MAX_TOKENS=9000 -### For OpenAI o1-mini or newer modles -OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 - -#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens -# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 +### Enable Thinking +# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}' +### Disable Thinking +# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding openai --help @@ -222,8 +218,17 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ### Qwen3 Specific Parameters deploy by vLLM # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' +### OpenAI Compatible API Specific Parameters +### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. +# OPENAI_LLM_TEMPERATURE=0.9 +### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) +### Typically, max_tokens does not include prompt content +### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider +# OPENAI_LLM_MAX_TOKENS=9000 +### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens +OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 + ### use the following command to see all support options for Ollama LLM -### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000 @@ -240,6 +245,8 @@ OLLAMA_LLM_NUM_CTX=32768 ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock ### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service +### If LightRAG deployed in Docker: +### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST ####################################################################################### # EMBEDDING_TIMEOUT=30