diff --git a/env.example b/env.example index dd3389b2..325ba69c 100644 --- a/env.example +++ b/env.example @@ -29,7 +29,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # OLLAMA_EMULATING_MODEL_NAME=lightrag OLLAMA_EMULATING_MODEL_TAG=latest -### Max nodes return from graph retrieval in webui +### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value) # MAX_GRAPH_NODES=1000 ### Logging level @@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest # JWT_ALGORITHM=HS256 ### API-Key to access LightRAG Server API +### Use this key in HTTP requests with the 'X-API-Key' header +### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* @@ -74,11 +76,6 @@ ENABLE_LLM_CACHE=true ### control the maximum tokens send to LLM (include entities, relations and chunks) # MAX_TOTAL_TOKENS=30000 -### maximum number of related chunks per source entity or relation -### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) -### Higher values increase re-ranking time -# RELATED_CHUNK_NUMBER=5 - ### chunk selection strategies ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM @@ -124,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Document processing output language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English +### PDF decryption password for protected PDF files +# PDF_DECRYPT_PASSWORD=your_pdf_password_here + ### Entity types that the LLM will attempt to recognize # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]' @@ -140,6 +140,22 @@ SUMMARY_LANGUAGE=English ### Maximum context size sent to LLM for description summary # SUMMARY_CONTEXT_SIZE=12000 +### control the maximum chunk_ids stored in vector and graph db +# MAX_SOURCE_IDS_PER_ENTITY=300 +# MAX_SOURCE_IDS_PER_RELATION=300 +### control chunk_ids limitation method: FIFO, KEEP +### FIFO: First in first out +### KEEP: Keep oldest (less merge action and faster) +# SOURCE_IDS_LIMIT_METHOD=FIFO + +# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance) +# MAX_FILE_PATHS=100 + +### maximum number of related chunks per source entity or relation +### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) +### Higher values increase re-ranking time +# RELATED_CHUNK_NUMBER=5 + ############################### ### Concurrency Configuration ############################### @@ -152,10 +168,13 @@ MAX_PARALLEL_INSERT=2 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 -########################################################### +########################################################################### ### LLM Configuration ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini -########################################################### +### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service +### If LightRAG deployed in Docker: +### uses host.docker.internal instead of localhost in LLM_BINDING_HOST +########################################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 @@ -164,9 +183,13 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Optional for Azure +### Azure OpenAI example +# LLM_BINDING=azure_openai +# LLM_BINDING_HOST=https://xxxx.openai.azure.com/ +# LLM_BINDING_API_KEY=your_api_key +### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT istead +# LLM_MODEL=gpt-5-mini # AZURE_OPENAI_API_VERSION=2024-08-01-preview -# AZURE_OPENAI_DEPLOYMENT=gpt-4o ### Openrouter example # LLM_MODEL=google/gemini-2.5-flash @@ -179,22 +202,16 @@ LLM_BINDING_API_KEY=your_api_key # LLM_MODEL=gemini-flash-latest # LLM_BINDING_API_KEY=your_gemini_api_key # LLM_BINDING_HOST=https://generativelanguage.googleapis.com -GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' + +### use the following command to see all support options for OpenAI, azure_openai or OpenRouter +### lightrag-server --llm-binding gemini --help +### Gemini Specific Parameters # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 # GEMINI_LLM_TEMPERATURE=0.7 - -### OpenAI Compatible API Specific Parameters -### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. -# OPENAI_LLM_TEMPERATURE=0.9 -### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) -### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions -### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider -# OPENAI_LLM_MAX_TOKENS=9000 -### For OpenAI o1-mini or newer modles -OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 - -#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens -# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 +### Enable Thinking +# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}' +### Disable Thinking +# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding openai --help @@ -205,6 +222,16 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ### Qwen3 Specific Parameters deploy by vLLM # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' +### OpenAI Compatible API Specific Parameters +### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. +# OPENAI_LLM_TEMPERATURE=0.9 +### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) +### Typically, max_tokens does not include prompt content +### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider +# OPENAI_LLM_MAX_TOKENS=9000 +### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens +OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 + ### use the following command to see all support options for Ollama LLM ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters @@ -218,30 +245,56 @@ OLLAMA_LLM_NUM_CTX=32768 ### Bedrock Specific Parameters # BEDROCK_LLM_TEMPERATURE=1.0 -#################################################################################### +####################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock -#################################################################################### +### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service +### If LightRAG deployed in Docker: +### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST +####################################################################################### # EMBEDDING_TIMEOUT=30 -EMBEDDING_BINDING=ollama -EMBEDDING_MODEL=bge-m3:latest -EMBEDDING_DIM=1024 -EMBEDDING_BINDING_API_KEY=your_api_key -# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost -EMBEDDING_BINDING_HOST=http://localhost:11434 -### OpenAI compatible (VoyageAI embedding openai compatible) -# EMBEDDING_BINDING=openai -# EMBEDDING_MODEL=text-embedding-3-large -# EMBEDDING_DIM=3072 -# EMBEDDING_BINDING_HOST=https://api.openai.com/v1 +### Control whether to send embedding_dim parameter to embedding API +### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina +### For OpenAI: Set to 'true' to enable dynamic dimension adjustment +### For OpenAI: Set to 'false' (default) to disable sending dimension parameter +### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama) + +# Ollama embedding +# EMBEDDING_BINDING=ollama +# EMBEDDING_MODEL=bge-m3:latest +# EMBEDDING_DIM=1024 # EMBEDDING_BINDING_API_KEY=your_api_key +### If LightRAG deployed in Docker uses host.docker.internal instead of localhost +# EMBEDDING_BINDING_HOST=http://localhost:11434 -### Optional for Azure -# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large -# AZURE_EMBEDDING_API_VERSION=2023-05-15 -# AZURE_EMBEDDING_ENDPOINT=your_endpoint +### OpenAI compatible embedding +EMBEDDING_BINDING=openai +EMBEDDING_MODEL=text-embedding-3-large +EMBEDDING_DIM=3072 +EMBEDDING_SEND_DIM=false +EMBEDDING_TOKEN_LIMIT=8192 +EMBEDDING_BINDING_HOST=https://api.openai.com/v1 +EMBEDDING_BINDING_API_KEY=your_api_key + +### Optional for Azure embedding +# EMBEDDING_BINDING=azure_openai +# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/ # AZURE_EMBEDDING_API_KEY=your_api_key +# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview +# Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT istead +# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large +# EMBEDDING_DIM=3072 + +### Gemini embedding +# EMBEDDING_BINDING=gemini +# EMBEDDING_MODEL=gemini-embedding-001 +# EMBEDDING_DIM=1536 +# EMBEDDING_TOKEN_LIMIT=2048 +# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com +# EMBEDDING_BINDING_API_KEY=your_api_key +### Gemini embedding requires sending dimension to server +# EMBEDDING_SEND_DIM=true ### Jina AI Embedding # EMBEDDING_BINDING=jina @@ -303,14 +356,18 @@ POSTGRES_USER=your_username POSTGRES_PASSWORD='your_password' POSTGRES_DATABASE=your_database POSTGRES_MAX_CONNECTIONS=12 -# POSTGRES_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### POSTGRES_WORKSPACE=forced_workspace_name ### PostgreSQL Vector Storage Configuration -### Vector storage type: HNSW, IVFFlat +### Vector storage type: HNSW, IVFFlat, VCHORDRQ POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 POSTGRES_IVFFLAT_LISTS=100 +POSTGRES_VCHORDRQ_BUILD_OPTIONS= +POSTGRES_VCHORDRQ_PROBES= +POSTGRES_VCHORDRQ_EPSILON=1.9 ### PostgreSQL Connection Retry Configuration (Network Robustness) ### Number of retry attempts (1-10, default: 3) @@ -349,7 +406,8 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30 NEO4J_MAX_CONNECTION_LIFETIME=300 NEO4J_LIVENESS_CHECK_TIMEOUT=30 NEO4J_KEEP_ALIVE=true -# NEO4J_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### NEO4J_WORKSPACE=forced_workspace_name ### MongoDB Configuration MONGO_URI=mongodb://root:root@localhost:27017/ @@ -363,12 +421,14 @@ MILVUS_DB_NAME=lightrag # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token -# MILVUS_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### MILVUS_WORKSPACE=forced_workspace_name ### Qdrant QDRANT_URL=http://localhost:6333 # QDRANT_API_KEY=your-api-key -# QDRANT_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### QDRANT_WORKSPACE=forced_workspace_name ### Redis REDIS_URI=redis://localhost:6379 @@ -376,11 +436,45 @@ REDIS_SOCKET_TIMEOUT=30 REDIS_CONNECT_TIMEOUT=10 REDIS_MAX_CONNECTIONS=100 REDIS_RETRY_ATTEMPTS=3 -# REDIS_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### REDIS_WORKSPACE=forced_workspace_name ### Memgraph Configuration MEMGRAPH_URI=bolt://localhost:7687 MEMGRAPH_USERNAME= MEMGRAPH_PASSWORD= MEMGRAPH_DATABASE=memgraph -# MEMGRAPH_WORKSPACE=forced_workspace_name +### DB specific workspace should not be set, keep for compatible only +### MEMGRAPH_WORKSPACE=forced_workspace_name + +############################ +### Evaluation Configuration +############################ +### RAGAS evaluation models (used for RAG quality assessment) +### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible +### Default uses OpenAI models for evaluation + +### LLM Configuration for Evaluation +# EVAL_LLM_MODEL=gpt-4o-mini +### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) +# EVAL_LLM_BINDING_API_KEY=your_api_key +### Custom OpenAI-compatible endpoint for LLM evaluation (optional) +# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 + +### Embedding Configuration for Evaluation +# EVAL_EMBEDDING_MODEL=text-embedding-3-large +### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) +# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key +### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) +# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 + +### Performance Tuning +### Number of concurrent test case evaluations +### Lower values reduce API rate limit issues but increase evaluation time +# EVAL_MAX_CONCURRENT=2 +### TOP_K query parameter of LightRAG (default: 10) +### Number of entities or relations retrieved from KG +# EVAL_QUERY_TOP_K=10 +### LLM request retry and timeout settings for evaluation +# EVAL_LLM_MAX_RETRIES=5 +# EVAL_LLM_TIMEOUT=180