cherry-pick 49fb11e2

2025-12-04 19:14:32 +08:00 · 2025-12-04 19:14:32 +08:00 · de4a73778d
commit de4a73778d
parent 900c77e36c
1 changed files with 142 additions and 48 deletions
--- a/env.example
+++ b/env.example
@ -29,7 +29,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # OLLAMA_EMULATING_MODEL_NAME=lightrag
 OLLAMA_EMULATING_MODEL_TAG=latest
-### Max nodes return from graph retrieval in webui
+### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
 # MAX_GRAPH_NODES=1000
 ### Logging level
@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # JWT_ALGORITHM=HS256
 ### API-Key to access LightRAG Server API
 ### Use this key in HTTP requests with the 'X-API-Key' header
 ### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*
@ -74,11 +76,6 @@ ENABLE_LLM_CACHE=true
 ### control the maximum tokens send to LLM (include entities, relations and chunks)
 # MAX_TOTAL_TOKENS=30000
 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
 ###     Higher values increase re-ranking time
 # RELATED_CHUNK_NUMBER=5
 ### chunk selection strategies
 ###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
 ###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
@ -124,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Document processing output language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
 ### PDF decryption password for protected PDF files
 # PDF_DECRYPT_PASSWORD=your_pdf_password_here
 ### Entity types that the LLM will attempt to recognize
 # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
@ -140,6 +140,22 @@ SUMMARY_LANGUAGE=English
 ### Maximum context size sent to LLM for description summary
 # SUMMARY_CONTEXT_SIZE=12000
 ### control the maximum chunk_ids stored in vector and graph db
 # MAX_SOURCE_IDS_PER_ENTITY=300
 # MAX_SOURCE_IDS_PER_RELATION=300
 ### control chunk_ids limitation method: FIFO, KEEP
 ###    FIFO: First in first out
 ###    KEEP: Keep oldest (less merge action and faster)
 # SOURCE_IDS_LIMIT_METHOD=FIFO
 # Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
 # MAX_FILE_PATHS=100
 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
 ###     Higher values increase re-ranking time
 # RELATED_CHUNK_NUMBER=5
 ###############################
 ### Concurrency Configuration
 ###############################
@ -152,10 +168,13 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10
-###########################################################
+###########################################################################
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
-###########################################################
+### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
 ### If LightRAG deployed in Docker:
 ###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
 ###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180
@ -164,9 +183,13 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### Optional for Azure
+### Azure OpenAI example
 # LLM_BINDING=azure_openai
 # LLM_BINDING_HOST=https://xxxx.openai.azure.com/
 # LLM_BINDING_API_KEY=your_api_key
 ### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT istead
 # LLM_MODEL=gpt-5-mini
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
 # AZURE_OPENAI_DEPLOYMENT=gpt-4o
 ### Openrouter example
 # LLM_MODEL=google/gemini-2.5-flash
@ -179,22 +202,16 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_MODEL=gemini-flash-latest
 # LLM_BINDING_API_KEY=your_gemini_api_key
 # LLM_BINDING_HOST=https://generativelanguage.googleapis.com
-GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding gemini --help
 ### Gemini Specific Parameters
 # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
 # GEMINI_LLM_TEMPERATURE=0.7
-
+### Enable Thinking
-### OpenAI Compatible API Specific Parameters
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
-### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
+### Disable Thinking
-# OPENAI_LLM_TEMPERATURE=0.9
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
 ### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
 ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
 # OPENAI_LLM_MAX_TOKENS=9000
 ### For OpenAI o1-mini or newer modles
 OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 #### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
 # OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help
@ -205,6 +222,16 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### Qwen3 Specific Parameters deploy by vLLM
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
 ### OpenAI Compatible API Specific Parameters
 ### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
 # OPENAI_LLM_TEMPERATURE=0.9
 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
 ### Typically, max_tokens does not include prompt content
 ### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
 # OPENAI_LLM_MAX_TOKENS=9000
 ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
 OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
@ -218,30 +245,56 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0
-####################################################################################
+#######################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-####################################################################################
+### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
 ### If LightRAG deployed in Docker:
 ###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
 #######################################################################################
 # EMBEDDING_TIMEOUT=30
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
 EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### OpenAI compatible (VoyageAI embedding openai compatible)
+### Control whether to send embedding_dim parameter to embedding API
-# EMBEDDING_BINDING=openai
+### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
-# EMBEDDING_MODEL=text-embedding-3-large
+### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
-# EMBEDDING_DIM=3072
+### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
-# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
 # Ollama embedding
 # EMBEDDING_BINDING=ollama
 # EMBEDDING_MODEL=bge-m3:latest
 # EMBEDDING_DIM=1024
 # EMBEDDING_BINDING_API_KEY=your_api_key
 ### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
 # EMBEDDING_BINDING_HOST=http://localhost:11434
-### Optional for Azure
+### OpenAI compatible embedding
-# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
+EMBEDDING_BINDING=openai
-# AZURE_EMBEDDING_API_VERSION=2023-05-15
+EMBEDDING_MODEL=text-embedding-3-large
-# AZURE_EMBEDDING_ENDPOINT=your_endpoint
+EMBEDDING_DIM=3072
 EMBEDDING_SEND_DIM=false
 EMBEDDING_TOKEN_LIMIT=8192
 EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 EMBEDDING_BINDING_API_KEY=your_api_key
 ### Optional for Azure embedding
 # EMBEDDING_BINDING=azure_openai
 # EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
 # AZURE_EMBEDDING_API_KEY=your_api_key
 # AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
 # Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT istead
 # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
 # EMBEDDING_DIM=3072
 ### Gemini embedding
 # EMBEDDING_BINDING=gemini
 # EMBEDDING_MODEL=gemini-embedding-001
 # EMBEDDING_DIM=1536
 # EMBEDDING_TOKEN_LIMIT=2048
 # EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
 # EMBEDDING_BINDING_API_KEY=your_api_key
 ### Gemini embedding requires sending dimension to server
 # EMBEDDING_SEND_DIM=true
 ### Jina AI Embedding
 # EMBEDDING_BINDING=jina
@ -303,14 +356,18 @@ POSTGRES_USER=your_username
 POSTGRES_PASSWORD='your_password'
 POSTGRES_DATABASE=your_database
 POSTGRES_MAX_CONNECTIONS=12
-# POSTGRES_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### POSTGRES_WORKSPACE=forced_workspace_name
 ### PostgreSQL Vector Storage Configuration
-### Vector storage type: HNSW, IVFFlat
+### Vector storage type: HNSW, IVFFlat, VCHORDRQ
 POSTGRES_VECTOR_INDEX_TYPE=HNSW
 POSTGRES_HNSW_M=16
 POSTGRES_HNSW_EF=200
 POSTGRES_IVFFLAT_LISTS=100
 POSTGRES_VCHORDRQ_BUILD_OPTIONS=
 POSTGRES_VCHORDRQ_PROBES=
 POSTGRES_VCHORDRQ_EPSILON=1.9
 ### PostgreSQL Connection Retry Configuration (Network Robustness)
 ### Number of retry attempts (1-10, default: 3)
@ -349,7 +406,8 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
 NEO4J_MAX_CONNECTION_LIFETIME=300
 NEO4J_LIVENESS_CHECK_TIMEOUT=30
 NEO4J_KEEP_ALIVE=true
-# NEO4J_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### NEO4J_WORKSPACE=forced_workspace_name
 ### MongoDB Configuration
 MONGO_URI=mongodb://root:root@localhost:27017/
@ -363,12 +421,14 @@ MILVUS_DB_NAME=lightrag
 # MILVUS_USER=root
 # MILVUS_PASSWORD=your_password
 # MILVUS_TOKEN=your_token
-# MILVUS_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### MILVUS_WORKSPACE=forced_workspace_name
 ### Qdrant
 QDRANT_URL=http://localhost:6333
 # QDRANT_API_KEY=your-api-key
-# QDRANT_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### QDRANT_WORKSPACE=forced_workspace_name
 ### Redis
 REDIS_URI=redis://localhost:6379
@ -376,11 +436,45 @@ REDIS_SOCKET_TIMEOUT=30
 REDIS_CONNECT_TIMEOUT=10
 REDIS_MAX_CONNECTIONS=100
 REDIS_RETRY_ATTEMPTS=3
-# REDIS_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### REDIS_WORKSPACE=forced_workspace_name
 ### Memgraph Configuration
 MEMGRAPH_URI=bolt://localhost:7687
 MEMGRAPH_USERNAME=
 MEMGRAPH_PASSWORD=
 MEMGRAPH_DATABASE=memgraph
-# MEMGRAPH_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
 ### MEMGRAPH_WORKSPACE=forced_workspace_name
 ############################
 ### Evaluation Configuration
 ############################
 ### RAGAS evaluation models (used for RAG quality assessment)
 ### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
 ### Default uses OpenAI models for evaluation
 ### LLM Configuration for Evaluation
 # EVAL_LLM_MODEL=gpt-4o-mini
 ### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
 # EVAL_LLM_BINDING_API_KEY=your_api_key
 ### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
 # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
 ### Embedding Configuration for Evaluation
 # EVAL_EMBEDDING_MODEL=text-embedding-3-large
 ### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
 # EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
 ### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
 # EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 ### Performance Tuning
 ### Number of concurrent test case evaluations
 ### Lower values reduce API rate limit issues but increase evaluation time
 # EVAL_MAX_CONCURRENT=2
 ### TOP_K query parameter of LightRAG (default: 10)
 ### Number of entities or relations retrieved from KG
 # EVAL_QUERY_TOP_K=10
 ### LLM request retry and timeout settings for evaluation
 # EVAL_LLM_MAX_RETRIES=5
 # EVAL_LLM_TIMEOUT=180