cherry-pick 49fb11e2

2025-12-04 19:14:32 +08:00 · 2025-12-04 19:14:32 +08:00 · de4a73778d
commit de4a73778d
parent 900c77e36c
1 changed files with 142 additions and 48 deletions
--- a/env.example
+++ b/env.example
@ -29,7 +29,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # OLLAMA_EMULATING_MODEL_NAME=lightrag
 OLLAMA_EMULATING_MODEL_TAG=latest

-### Max nodes return from graph retrieval in webui
+### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
 # MAX_GRAPH_NODES=1000

 ### Logging level
@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # JWT_ALGORITHM=HS256

 ### API-Key to access LightRAG Server API
+### Use this key in HTTP requests with the 'X-API-Key' header
+### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
 # LIGHTRAG_API_KEY=your-secure-api-key-here
 # WHITELIST_PATHS=/health,/api/*

@ -74,11 +76,6 @@ ENABLE_LLM_CACHE=true
 ### control the maximum tokens send to LLM (include entities, relations and chunks)
 # MAX_TOTAL_TOKENS=30000

-### maximum number of related chunks per source entity or relation
-###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
-###     Higher values increase re-ranking time
-# RELATED_CHUNK_NUMBER=5
-
 ### chunk selection strategies
 ###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
 ###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
@ -124,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 ### Document processing output language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English

+### PDF decryption password for protected PDF files
+# PDF_DECRYPT_PASSWORD=your_pdf_password_here
+
 ### Entity types that the LLM will attempt to recognize
 # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'

@ -140,6 +140,22 @@ SUMMARY_LANGUAGE=English
 ### Maximum context size sent to LLM for description summary
 # SUMMARY_CONTEXT_SIZE=12000

+### control the maximum chunk_ids stored in vector and graph db
+# MAX_SOURCE_IDS_PER_ENTITY=300
+# MAX_SOURCE_IDS_PER_RELATION=300
+### control chunk_ids limitation method: FIFO, KEEP
+###    FIFO: First in first out
+###    KEEP: Keep oldest (less merge action and faster)
+# SOURCE_IDS_LIMIT_METHOD=FIFO
+
+# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
+# MAX_FILE_PATHS=100
+
+### maximum number of related chunks per source entity or relation
+###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
+###     Higher values increase re-ranking time
+# RELATED_CHUNK_NUMBER=5
+
 ###############################
 ### Concurrency Configuration
 ###############################
@ -152,10 +168,13 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10

-###########################################################
+###########################################################################
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
-###########################################################
+### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
+###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180

@ -164,9 +183,13 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key

-### Optional for Azure
+### Azure OpenAI example
+# LLM_BINDING=azure_openai
+# LLM_BINDING_HOST=https://xxxx.openai.azure.com/
+# LLM_BINDING_API_KEY=your_api_key
+### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT istead
+# LLM_MODEL=gpt-5-mini
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
-# AZURE_OPENAI_DEPLOYMENT=gpt-4o

 ### Openrouter example
 # LLM_MODEL=google/gemini-2.5-flash
@ -179,22 +202,16 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_MODEL=gemini-flash-latest
 # LLM_BINDING_API_KEY=your_gemini_api_key
 # LLM_BINDING_HOST=https://generativelanguage.googleapis.com
-GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
+### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
+### lightrag-server --llm-binding gemini --help
+### Gemini Specific Parameters
 # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
 # GEMINI_LLM_TEMPERATURE=0.7
-
-### OpenAI Compatible API Specific Parameters
-### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
-# OPENAI_LLM_TEMPERATURE=0.9
-### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
-### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
-### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
-# OPENAI_LLM_MAX_TOKENS=9000
-### For OpenAI o1-mini or newer modles
-OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
-
-#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
-# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+### Enable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
+### Disable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'

 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help
@ -205,6 +222,16 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### Qwen3 Specific Parameters deploy by vLLM
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'

+### OpenAI Compatible API Specific Parameters
+### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
+# OPENAI_LLM_TEMPERATURE=0.9
+### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
+### Typically, max_tokens does not include prompt content
+### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
@ -218,30 +245,56 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0

-####################################################################################
+#######################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-####################################################################################
+### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
+#######################################################################################
 # EMBEDDING_TIMEOUT=30
-EMBEDDING_BINDING=ollama
-EMBEDDING_MODEL=bge-m3:latest
-EMBEDDING_DIM=1024
-EMBEDDING_BINDING_API_KEY=your_api_key
-# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
-EMBEDDING_BINDING_HOST=http://localhost:11434

-### OpenAI compatible (VoyageAI embedding openai compatible)
-# EMBEDDING_BINDING=openai
-# EMBEDDING_MODEL=text-embedding-3-large
-# EMBEDDING_DIM=3072
-# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+### Control whether to send embedding_dim parameter to embedding API
+### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
+### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
+### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
+### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
+
+# Ollama embedding
+# EMBEDDING_BINDING=ollama
+# EMBEDDING_MODEL=bge-m3:latest
+# EMBEDDING_DIM=1024
 # EMBEDDING_BINDING_API_KEY=your_api_key
+### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
+# EMBEDDING_BINDING_HOST=http://localhost:11434

-### Optional for Azure
-# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
-# AZURE_EMBEDDING_API_VERSION=2023-05-15
-# AZURE_EMBEDDING_ENDPOINT=your_endpoint
+### OpenAI compatible embedding
+EMBEDDING_BINDING=openai
+EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_DIM=3072
+EMBEDDING_SEND_DIM=false
+EMBEDDING_TOKEN_LIMIT=8192
+EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING_API_KEY=your_api_key
+
+### Optional for Azure embedding
+# EMBEDDING_BINDING=azure_openai
+# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
 # AZURE_EMBEDDING_API_KEY=your_api_key
+# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
+# Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT istead
+# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
+# EMBEDDING_DIM=3072
+
+### Gemini embedding
+# EMBEDDING_BINDING=gemini
+# EMBEDDING_MODEL=gemini-embedding-001
+# EMBEDDING_DIM=1536
+# EMBEDDING_TOKEN_LIMIT=2048
+# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
+# EMBEDDING_BINDING_API_KEY=your_api_key
+### Gemini embedding requires sending dimension to server
+# EMBEDDING_SEND_DIM=true

 ### Jina AI Embedding
 # EMBEDDING_BINDING=jina
@ -303,14 +356,18 @@ POSTGRES_USER=your_username
 POSTGRES_PASSWORD='your_password'
 POSTGRES_DATABASE=your_database
 POSTGRES_MAX_CONNECTIONS=12
-# POSTGRES_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### POSTGRES_WORKSPACE=forced_workspace_name

 ### PostgreSQL Vector Storage Configuration
-### Vector storage type: HNSW, IVFFlat
+### Vector storage type: HNSW, IVFFlat, VCHORDRQ
 POSTGRES_VECTOR_INDEX_TYPE=HNSW
 POSTGRES_HNSW_M=16
 POSTGRES_HNSW_EF=200
 POSTGRES_IVFFLAT_LISTS=100
+POSTGRES_VCHORDRQ_BUILD_OPTIONS=
+POSTGRES_VCHORDRQ_PROBES=
+POSTGRES_VCHORDRQ_EPSILON=1.9

 ### PostgreSQL Connection Retry Configuration (Network Robustness)
 ### Number of retry attempts (1-10, default: 3)
@ -349,7 +406,8 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
 NEO4J_MAX_CONNECTION_LIFETIME=300
 NEO4J_LIVENESS_CHECK_TIMEOUT=30
 NEO4J_KEEP_ALIVE=true
-# NEO4J_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### NEO4J_WORKSPACE=forced_workspace_name

 ### MongoDB Configuration
 MONGO_URI=mongodb://root:root@localhost:27017/
@ -363,12 +421,14 @@ MILVUS_DB_NAME=lightrag
 # MILVUS_USER=root
 # MILVUS_PASSWORD=your_password
 # MILVUS_TOKEN=your_token
-# MILVUS_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### MILVUS_WORKSPACE=forced_workspace_name

 ### Qdrant
 QDRANT_URL=http://localhost:6333
 # QDRANT_API_KEY=your-api-key
-# QDRANT_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### QDRANT_WORKSPACE=forced_workspace_name

 ### Redis
 REDIS_URI=redis://localhost:6379
@ -376,11 +436,45 @@ REDIS_SOCKET_TIMEOUT=30
 REDIS_CONNECT_TIMEOUT=10
 REDIS_MAX_CONNECTIONS=100
 REDIS_RETRY_ATTEMPTS=3
-# REDIS_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### REDIS_WORKSPACE=forced_workspace_name

 ### Memgraph Configuration
 MEMGRAPH_URI=bolt://localhost:7687
 MEMGRAPH_USERNAME=
 MEMGRAPH_PASSWORD=
 MEMGRAPH_DATABASE=memgraph
-# MEMGRAPH_WORKSPACE=forced_workspace_name
+### DB specific workspace should not be set, keep for compatible only
+### MEMGRAPH_WORKSPACE=forced_workspace_name
+
+############################
+### Evaluation Configuration
+############################
+### RAGAS evaluation models (used for RAG quality assessment)
+### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
+### Default uses OpenAI models for evaluation
+
+### LLM Configuration for Evaluation
+# EVAL_LLM_MODEL=gpt-4o-mini
+### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
+# EVAL_LLM_BINDING_API_KEY=your_api_key
+### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
+# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
+
+### Embedding Configuration for Evaluation
+# EVAL_EMBEDDING_MODEL=text-embedding-3-large
+### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
+# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
+### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
+# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+
+### Performance Tuning
+### Number of concurrent test case evaluations
+### Lower values reduce API rate limit issues but increase evaluation time
+# EVAL_MAX_CONCURRENT=2
+### TOP_K query parameter of LightRAG (default: 10)
+### Number of entities or relations retrieved from KG
+# EVAL_QUERY_TOP_K=10
+### LLM request retry and timeout settings for evaluation
+# EVAL_LLM_MAX_RETRIES=5
+# EVAL_LLM_TIMEOUT=180