cherry-pick d0ae7a67

2025-12-04 19:18:15 +08:00 · 2025-12-04 19:18:15 +08:00 · 2d4c56433e
commit 2d4c56433e
parent c9e5988349
1 changed files with 47 additions and 150 deletions
--- a/env.example
+++ b/env.example
@ -23,13 +23,13 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # WORKING_DIR=<absolute_path_for_working_dir>
 ### Tiktoken cache directory (Store cached files in this folder for offline deployment)
-# TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+# TIKTOKEN_CACHE_DIR=./temp/tiktoken
 ### Ollama Emulating Model and Tag
 # OLLAMA_EMULATING_MODEL_NAME=lightrag
 OLLAMA_EMULATING_MODEL_TAG=latest
-### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
+### Max nodes return from graph retrieval in webui
 # MAX_GRAPH_NODES=1000
 ### Logging level
@ -50,8 +50,6 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # JWT_ALGORITHM=HS256
 ### API-Key to access LightRAG Server API
 ### Use this key in HTTP requests with the 'X-API-Key' header
 ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*
@ -76,6 +74,11 @@ ENABLE_LLM_CACHE=true
 ### control the maximum tokens send to LLM (include entities, relations and chunks)
 # MAX_TOTAL_TOKENS=30000
 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
 ###     Higher values increase re-ranking time
 # RELATED_CHUNK_NUMBER=5
 ### chunk selection strategies
 ###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
 ###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
@ -121,9 +124,6 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Document processing output language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 ### PDF decryption password for protected PDF files
 # PDF_DECRYPT_PASSWORD=your_pdf_password_here
 ### Entity types that the LLM will attempt to recognize
 # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
@ -140,22 +140,6 @@ SUMMARY_LANGUAGE=English
 ### Maximum context size sent to LLM for description summary
 # SUMMARY_CONTEXT_SIZE=12000
 ### control the maximum chunk_ids stored in vector and graph db
 # MAX_SOURCE_IDS_PER_ENTITY=300
 # MAX_SOURCE_IDS_PER_RELATION=300
 ### control chunk_ids limitation method: FIFO, KEEP
 ###    FIFO: First in first out
 ###    KEEP: Keep oldest (less merge action and faster)
 # SOURCE_IDS_LIMIT_METHOD=FIFO
 # Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
 # MAX_FILE_PATHS=100
 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
 ###     Higher values increase re-ranking time
 # RELATED_CHUNK_NUMBER=5
 ###############################
 ### Concurrency Configuration
 ###############################
@ -168,13 +152,10 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10
-###########################################################################
+###########################################################
 ### LLM Configuration
-### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
+### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
-### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+###########################################################
 ### If LightRAG deployed in Docker:
 ###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
 ###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180
@ -183,13 +164,9 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### Azure OpenAI example
+### Optional for Azure
 ### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
-# LLM_BINDING=azure_openai
+# AZURE_OPENAI_DEPLOYMENT=gpt-4o
 # LLM_BINDING_HOST=https://xxxx.openai.azure.com/
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_MODEL=my-gpt-mini-deployment
 ### Openrouter example
 # LLM_MODEL=google/gemini-2.5-flash
@ -197,21 +174,18 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai
-### Gemini example
+### OpenAI Compatible API Specific Parameters
-# LLM_BINDING=gemini
+### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
-# LLM_MODEL=gemini-flash-latest
+# OPENAI_LLM_TEMPERATURE=0.9
-# LLM_BINDING_API_KEY=your_gemini_api_key
+### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
-# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
+### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
 ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
 # OPENAI_LLM_MAX_TOKENS=9000
 ### For OpenAI o1-mini or newer modles
 OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
-### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
+#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
-### lightrag-server --llm-binding gemini --help
+# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### Gemini Specific Parameters
 # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
 # GEMINI_LLM_TEMPERATURE=0.7
 ### Enable Thinking
 # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
 ### Disable Thinking
 # GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help
@ -222,16 +196,6 @@ LLM_BINDING_API_KEY=your_api_key
 ### Qwen3 Specific Parameters deploy by vLLM
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
 ### OpenAI Compatible API Specific Parameters
 ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
 # OPENAI_LLM_TEMPERATURE=0.9
 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
 ### Typically, max_tokens does not include prompt content
 ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
 # OPENAI_LLM_MAX_TOKENS=9000
 ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
 OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
@ -245,56 +209,30 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0
-#######################################################################################
+####################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+####################################################################################
 ### If LightRAG deployed in Docker:
 ###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
 #######################################################################################
 # EMBEDDING_TIMEOUT=30
-
+EMBEDDING_BINDING=ollama
-### Control whether to send embedding_dim parameter to embedding API
+EMBEDDING_MODEL=bge-m3:latest
-### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
+EMBEDDING_DIM=1024
 ### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
 ### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
 ### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
 # Ollama embedding
 # EMBEDDING_BINDING=ollama
 # EMBEDDING_MODEL=bge-m3:latest
 # EMBEDDING_DIM=1024
 # EMBEDDING_BINDING_API_KEY=your_api_key
 ### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
 # EMBEDDING_BINDING_HOST=http://localhost:11434
 ### OpenAI compatible embedding
 EMBEDDING_BINDING=openai
 EMBEDDING_MODEL=text-embedding-3-large
 EMBEDDING_DIM=3072
 EMBEDDING_SEND_DIM=false
 EMBEDDING_TOKEN_LIMIT=8192
 EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Optional for Azure embedding
+### OpenAI compatible (VoyageAI embedding openai compatible)
-### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
+# EMBEDDING_BINDING=openai
-# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
+# EMBEDDING_MODEL=text-embedding-3-large
 # EMBEDDING_BINDING=azure_openai
 # EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
 # EMBEDDING_API_KEY=your_api_key
 # EMBEDDING_MODEL==my-text-embedding-3-large-deployment
 # EMBEDDING_DIM=3072
-
+# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 ### Gemini embedding
 # EMBEDDING_BINDING=gemini
 # EMBEDDING_MODEL=gemini-embedding-001
 # EMBEDDING_DIM=1536
 # EMBEDDING_TOKEN_LIMIT=2048
 # EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
 # EMBEDDING_BINDING_API_KEY=your_api_key
-### Gemini embedding requires sending dimension to server
+
-# EMBEDDING_SEND_DIM=true
+### Optional for Azure
 # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
 # AZURE_EMBEDDING_API_VERSION=2023-05-15
 # AZURE_EMBEDDING_ENDPOINT=your_endpoint
 # AZURE_EMBEDDING_API_KEY=your_api_key
 ### Jina AI Embedding
 # EMBEDDING_BINDING=jina
@ -356,18 +294,14 @@ POSTGRES_USER=your_username
 POSTGRES_PASSWORD='your_password'
 POSTGRES_DATABASE=your_database
 POSTGRES_MAX_CONNECTIONS=12
-### DB specific workspace should not be set, keep for compatible only
+# POSTGRES_WORKSPACE=forced_workspace_name
 ### POSTGRES_WORKSPACE=forced_workspace_name
 ### PostgreSQL Vector Storage Configuration
-### Vector storage type: HNSW, IVFFlat, VCHORDRQ
+### Vector storage type: HNSW, IVFFlat
 POSTGRES_VECTOR_INDEX_TYPE=HNSW
 POSTGRES_HNSW_M=16
 POSTGRES_HNSW_EF=200
 POSTGRES_IVFFLAT_LISTS=100
 POSTGRES_VCHORDRQ_BUILD_OPTIONS=
 POSTGRES_VCHORDRQ_PROBES=
 POSTGRES_VCHORDRQ_EPSILON=1.9
 ### PostgreSQL Connection Retry Configuration (Network Robustness)
 ### Number of retry attempts (1-10, default: 3)
@ -406,8 +340,7 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
 NEO4J_MAX_CONNECTION_LIFETIME=300
 NEO4J_LIVENESS_CHECK_TIMEOUT=30
 NEO4J_KEEP_ALIVE=true
-### DB specific workspace should not be set, keep for compatible only
+# NEO4J_WORKSPACE=forced_workspace_name
 ### NEO4J_WORKSPACE=forced_workspace_name
 ### MongoDB Configuration
 MONGO_URI=mongodb://root:root@localhost:27017/
@ -421,14 +354,12 @@ MILVUS_DB_NAME=lightrag
 # MILVUS_USER=root
 # MILVUS_PASSWORD=your_password
 # MILVUS_TOKEN=your_token
-### DB specific workspace should not be set, keep for compatible only
+# MILVUS_WORKSPACE=forced_workspace_name
 ### MILVUS_WORKSPACE=forced_workspace_name
 ### Qdrant
 QDRANT_URL=http://localhost:6333
 # QDRANT_API_KEY=your-api-key
-### DB specific workspace should not be set, keep for compatible only
+# QDRANT_WORKSPACE=forced_workspace_name
 ### QDRANT_WORKSPACE=forced_workspace_name
 ### Redis
 REDIS_URI=redis://localhost:6379
@ -436,45 +367,11 @@ REDIS_SOCKET_TIMEOUT=30
 REDIS_CONNECT_TIMEOUT=10
 REDIS_MAX_CONNECTIONS=100
 REDIS_RETRY_ATTEMPTS=3
-### DB specific workspace should not be set, keep for compatible only
+# REDIS_WORKSPACE=forced_workspace_name
 ### REDIS_WORKSPACE=forced_workspace_name
 ### Memgraph Configuration
 MEMGRAPH_URI=bolt://localhost:7687
 MEMGRAPH_USERNAME=
 MEMGRAPH_PASSWORD=
 MEMGRAPH_DATABASE=memgraph
-### DB specific workspace should not be set, keep for compatible only
+# MEMGRAPH_WORKSPACE=forced_workspace_name
 ### MEMGRAPH_WORKSPACE=forced_workspace_name
 ############################
 ### Evaluation Configuration
 ############################
 ### RAGAS evaluation models (used for RAG quality assessment)
 ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
 ### Default uses OpenAI models for evaluation
 ### LLM Configuration for Evaluation
 # EVAL_LLM_MODEL=gpt-4o-mini
 ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
 # EVAL_LLM_BINDING_API_KEY=your_api_key
 ### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
 # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
 ### Embedding Configuration for Evaluation
 # EVAL_EMBEDDING_MODEL=text-embedding-3-large
 ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
 # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
 ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
 # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 ### Performance Tuning
 ### Number of concurrent test case evaluations
 ### Lower values reduce API rate limit issues but increase evaluation time
 # EVAL_MAX_CONCURRENT=2
 ### TOP_K query parameter of LightRAG (default: 10)
 ### Number of entities or relations retrieved from KG
 # EVAL_QUERY_TOP_K=10
 ### LLM request retry and timeout settings for evaluation
 # EVAL_LLM_MAX_RETRIES=5
 # EVAL_LLM_TIMEOUT=180