diff --git a/env.example b/env.example index fea99953..4c8d355d 100644 --- a/env.example +++ b/env.example @@ -23,13 +23,13 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # WORKING_DIR= ### Tiktoken cache directory (Store cached files in this folder for offline deployment) -# TIKTOKEN_CACHE_DIR=/app/data/tiktoken +# TIKTOKEN_CACHE_DIR=./temp/tiktoken ### Ollama Emulating Model and Tag # OLLAMA_EMULATING_MODEL_NAME=lightrag OLLAMA_EMULATING_MODEL_TAG=latest -### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value) +### Max nodes return from graph retrieval in webui # MAX_GRAPH_NODES=1000 ### Logging level @@ -50,8 +50,6 @@ OLLAMA_EMULATING_MODEL_TAG=latest # JWT_ALGORITHM=HS256 ### API-Key to access LightRAG Server API -### Use this key in HTTP requests with the 'X-API-Key' header -### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query # LIGHTRAG_API_KEY=your-secure-api-key-here # WHITELIST_PATHS=/health,/api/* @@ -76,6 +74,11 @@ ENABLE_LLM_CACHE=true ### control the maximum tokens send to LLM (include entities, relations and chunks) # MAX_TOTAL_TOKENS=30000 +### maximum number of related chunks per source entity or relation +### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) +### Higher values increase re-ranking time +# RELATED_CHUNK_NUMBER=5 + ### chunk selection strategies ### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval ### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM @@ -121,9 +124,6 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Document processing output language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English -### PDF decryption password for protected PDF files -# PDF_DECRYPT_PASSWORD=your_pdf_password_here - ### Entity types that the LLM will attempt to recognize # ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]' @@ -140,22 +140,6 @@ SUMMARY_LANGUAGE=English ### Maximum context size sent to LLM for description summary # SUMMARY_CONTEXT_SIZE=12000 -### control the maximum chunk_ids stored in vector and graph db -# MAX_SOURCE_IDS_PER_ENTITY=300 -# MAX_SOURCE_IDS_PER_RELATION=300 -### control chunk_ids limitation method: FIFO, KEEP -### FIFO: First in first out -### KEEP: Keep oldest (less merge action and faster) -# SOURCE_IDS_LIMIT_METHOD=FIFO - -# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance) -# MAX_FILE_PATHS=100 - -### maximum number of related chunks per source entity or relation -### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph) -### Higher values increase re-ranking time -# RELATED_CHUNK_NUMBER=5 - ############################### ### Concurrency Configuration ############################### @@ -168,13 +152,10 @@ MAX_PARALLEL_INSERT=2 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 -########################################################################### +########################################################### ### LLM Configuration -### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini -### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service -### If LightRAG deployed in Docker: -### uses host.docker.internal instead of localhost in LLM_BINDING_HOST -########################################################################### +### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock +########################################################### ### LLM request timeout setting for all llm (0 means no timeout for Ollma) # LLM_TIMEOUT=180 @@ -183,13 +164,9 @@ LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Azure OpenAI example -### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead +### Optional for Azure # AZURE_OPENAI_API_VERSION=2024-08-01-preview -# LLM_BINDING=azure_openai -# LLM_BINDING_HOST=https://xxxx.openai.azure.com/ -# LLM_BINDING_API_KEY=your_api_key -# LLM_MODEL=my-gpt-mini-deployment +# AZURE_OPENAI_DEPLOYMENT=gpt-4o ### Openrouter example # LLM_MODEL=google/gemini-2.5-flash @@ -197,21 +174,18 @@ LLM_BINDING_API_KEY=your_api_key # LLM_BINDING_API_KEY=your_api_key # LLM_BINDING=openai -### Gemini example -# LLM_BINDING=gemini -# LLM_MODEL=gemini-flash-latest -# LLM_BINDING_API_KEY=your_gemini_api_key -# LLM_BINDING_HOST=https://generativelanguage.googleapis.com +### OpenAI Compatible API Specific Parameters +### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. +# OPENAI_LLM_TEMPERATURE=0.9 +### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) +### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions +### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider +# OPENAI_LLM_MAX_TOKENS=9000 +### For OpenAI o1-mini or newer modles +OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 -### use the following command to see all support options for OpenAI, azure_openai or OpenRouter -### lightrag-server --llm-binding gemini --help -### Gemini Specific Parameters -# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000 -# GEMINI_LLM_TEMPERATURE=0.7 -### Enable Thinking -# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}' -### Disable Thinking -# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}' +#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens +# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter ### lightrag-server --llm-binding openai --help @@ -222,16 +196,6 @@ LLM_BINDING_API_KEY=your_api_key ### Qwen3 Specific Parameters deploy by vLLM # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}' -### OpenAI Compatible API Specific Parameters -### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B. -# OPENAI_LLM_TEMPERATURE=0.9 -### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s) -### Typically, max_tokens does not include prompt content -### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider -# OPENAI_LLM_MAX_TOKENS=9000 -### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens -OPENAI_LLM_MAX_COMPLETION_TOKENS=9000 - ### use the following command to see all support options for Ollama LLM ### lightrag-server --llm-binding ollama --help ### Ollama Server Specific Parameters @@ -245,56 +209,30 @@ OLLAMA_LLM_NUM_CTX=32768 ### Bedrock Specific Parameters # BEDROCK_LLM_TEMPERATURE=1.0 -####################################################################################### +#################################################################################### ### Embedding Configuration (Should not be changed after the first file processed) ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock -### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service -### If LightRAG deployed in Docker: -### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST -####################################################################################### +#################################################################################### # EMBEDDING_TIMEOUT=30 - -### Control whether to send embedding_dim parameter to embedding API -### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina -### For OpenAI: Set to 'true' to enable dynamic dimension adjustment -### For OpenAI: Set to 'false' (default) to disable sending dimension parameter -### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama) - -# Ollama embedding -# EMBEDDING_BINDING=ollama -# EMBEDDING_MODEL=bge-m3:latest -# EMBEDDING_DIM=1024 -# EMBEDDING_BINDING_API_KEY=your_api_key -### If LightRAG deployed in Docker uses host.docker.internal instead of localhost -# EMBEDDING_BINDING_HOST=http://localhost:11434 - -### OpenAI compatible embedding -EMBEDDING_BINDING=openai -EMBEDDING_MODEL=text-embedding-3-large -EMBEDDING_DIM=3072 -EMBEDDING_SEND_DIM=false -EMBEDDING_TOKEN_LIMIT=8192 -EMBEDDING_BINDING_HOST=https://api.openai.com/v1 +EMBEDDING_BINDING=ollama +EMBEDDING_MODEL=bge-m3:latest +EMBEDDING_DIM=1024 EMBEDDING_BINDING_API_KEY=your_api_key +# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost +EMBEDDING_BINDING_HOST=http://localhost:11434 -### Optional for Azure embedding -### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead -# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview -# EMBEDDING_BINDING=azure_openai -# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/ -# EMBEDDING_API_KEY=your_api_key -# EMBEDDING_MODEL==my-text-embedding-3-large-deployment +### OpenAI compatible (VoyageAI embedding openai compatible) +# EMBEDDING_BINDING=openai +# EMBEDDING_MODEL=text-embedding-3-large # EMBEDDING_DIM=3072 - -### Gemini embedding -# EMBEDDING_BINDING=gemini -# EMBEDDING_MODEL=gemini-embedding-001 -# EMBEDDING_DIM=1536 -# EMBEDDING_TOKEN_LIMIT=2048 -# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com +# EMBEDDING_BINDING_HOST=https://api.openai.com/v1 # EMBEDDING_BINDING_API_KEY=your_api_key -### Gemini embedding requires sending dimension to server -# EMBEDDING_SEND_DIM=true + +### Optional for Azure +# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large +# AZURE_EMBEDDING_API_VERSION=2023-05-15 +# AZURE_EMBEDDING_ENDPOINT=your_endpoint +# AZURE_EMBEDDING_API_KEY=your_api_key ### Jina AI Embedding # EMBEDDING_BINDING=jina @@ -356,18 +294,14 @@ POSTGRES_USER=your_username POSTGRES_PASSWORD='your_password' POSTGRES_DATABASE=your_database POSTGRES_MAX_CONNECTIONS=12 -### DB specific workspace should not be set, keep for compatible only -### POSTGRES_WORKSPACE=forced_workspace_name +# POSTGRES_WORKSPACE=forced_workspace_name ### PostgreSQL Vector Storage Configuration -### Vector storage type: HNSW, IVFFlat, VCHORDRQ +### Vector storage type: HNSW, IVFFlat POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 POSTGRES_IVFFLAT_LISTS=100 -POSTGRES_VCHORDRQ_BUILD_OPTIONS= -POSTGRES_VCHORDRQ_PROBES= -POSTGRES_VCHORDRQ_EPSILON=1.9 ### PostgreSQL Connection Retry Configuration (Network Robustness) ### Number of retry attempts (1-10, default: 3) @@ -406,8 +340,7 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30 NEO4J_MAX_CONNECTION_LIFETIME=300 NEO4J_LIVENESS_CHECK_TIMEOUT=30 NEO4J_KEEP_ALIVE=true -### DB specific workspace should not be set, keep for compatible only -### NEO4J_WORKSPACE=forced_workspace_name +# NEO4J_WORKSPACE=forced_workspace_name ### MongoDB Configuration MONGO_URI=mongodb://root:root@localhost:27017/ @@ -421,14 +354,12 @@ MILVUS_DB_NAME=lightrag # MILVUS_USER=root # MILVUS_PASSWORD=your_password # MILVUS_TOKEN=your_token -### DB specific workspace should not be set, keep for compatible only -### MILVUS_WORKSPACE=forced_workspace_name +# MILVUS_WORKSPACE=forced_workspace_name ### Qdrant QDRANT_URL=http://localhost:6333 # QDRANT_API_KEY=your-api-key -### DB specific workspace should not be set, keep for compatible only -### QDRANT_WORKSPACE=forced_workspace_name +# QDRANT_WORKSPACE=forced_workspace_name ### Redis REDIS_URI=redis://localhost:6379 @@ -436,45 +367,11 @@ REDIS_SOCKET_TIMEOUT=30 REDIS_CONNECT_TIMEOUT=10 REDIS_MAX_CONNECTIONS=100 REDIS_RETRY_ATTEMPTS=3 -### DB specific workspace should not be set, keep for compatible only -### REDIS_WORKSPACE=forced_workspace_name +# REDIS_WORKSPACE=forced_workspace_name ### Memgraph Configuration MEMGRAPH_URI=bolt://localhost:7687 MEMGRAPH_USERNAME= MEMGRAPH_PASSWORD= MEMGRAPH_DATABASE=memgraph -### DB specific workspace should not be set, keep for compatible only -### MEMGRAPH_WORKSPACE=forced_workspace_name - -############################ -### Evaluation Configuration -############################ -### RAGAS evaluation models (used for RAG quality assessment) -### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible -### Default uses OpenAI models for evaluation - -### LLM Configuration for Evaluation -# EVAL_LLM_MODEL=gpt-4o-mini -### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) -# EVAL_LLM_BINDING_API_KEY=your_api_key -### Custom OpenAI-compatible endpoint for LLM evaluation (optional) -# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 - -### Embedding Configuration for Evaluation -# EVAL_EMBEDDING_MODEL=text-embedding-3-large -### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) -# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key -### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) -# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 - -### Performance Tuning -### Number of concurrent test case evaluations -### Lower values reduce API rate limit issues but increase evaluation time -# EVAL_MAX_CONCURRENT=2 -### TOP_K query parameter of LightRAG (default: 10) -### Number of entities or relations retrieved from KG -# EVAL_QUERY_TOP_K=10 -### LLM request retry and timeout settings for evaluation -# EVAL_LLM_MAX_RETRIES=5 -# EVAL_LLM_TIMEOUT=180 +# MEMGRAPH_WORKSPACE=forced_workspace_name