cherry-pick d0ae7a67
This commit is contained in:
parent
c9e5988349
commit
2d4c56433e
1 changed files with 47 additions and 150 deletions
197
env.example
197
env.example
|
|
@ -23,13 +23,13 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
|
|||
# WORKING_DIR=<absolute_path_for_working_dir>
|
||||
|
||||
### Tiktoken cache directory (Store cached files in this folder for offline deployment)
|
||||
# TIKTOKEN_CACHE_DIR=/app/data/tiktoken
|
||||
# TIKTOKEN_CACHE_DIR=./temp/tiktoken
|
||||
|
||||
### Ollama Emulating Model and Tag
|
||||
# OLLAMA_EMULATING_MODEL_NAME=lightrag
|
||||
OLLAMA_EMULATING_MODEL_TAG=latest
|
||||
|
||||
### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
|
||||
### Max nodes return from graph retrieval in webui
|
||||
# MAX_GRAPH_NODES=1000
|
||||
|
||||
### Logging level
|
||||
|
|
@ -50,8 +50,6 @@ OLLAMA_EMULATING_MODEL_TAG=latest
|
|||
# JWT_ALGORITHM=HS256
|
||||
|
||||
### API-Key to access LightRAG Server API
|
||||
### Use this key in HTTP requests with the 'X-API-Key' header
|
||||
### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
|
||||
# LIGHTRAG_API_KEY=your-secure-api-key-here
|
||||
# WHITELIST_PATHS=/health,/api/*
|
||||
|
||||
|
|
@ -76,6 +74,11 @@ ENABLE_LLM_CACHE=true
|
|||
### control the maximum tokens send to LLM (include entities, relations and chunks)
|
||||
# MAX_TOTAL_TOKENS=30000
|
||||
|
||||
### maximum number of related chunks per source entity or relation
|
||||
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
||||
### Higher values increase re-ranking time
|
||||
# RELATED_CHUNK_NUMBER=5
|
||||
|
||||
### chunk selection strategies
|
||||
### VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
|
||||
### WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
|
||||
|
|
@ -121,9 +124,6 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
|||
### Document processing output language: English, Chinese, French, German ...
|
||||
SUMMARY_LANGUAGE=English
|
||||
|
||||
### PDF decryption password for protected PDF files
|
||||
# PDF_DECRYPT_PASSWORD=your_pdf_password_here
|
||||
|
||||
### Entity types that the LLM will attempt to recognize
|
||||
# ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
|
||||
|
||||
|
|
@ -140,22 +140,6 @@ SUMMARY_LANGUAGE=English
|
|||
### Maximum context size sent to LLM for description summary
|
||||
# SUMMARY_CONTEXT_SIZE=12000
|
||||
|
||||
### control the maximum chunk_ids stored in vector and graph db
|
||||
# MAX_SOURCE_IDS_PER_ENTITY=300
|
||||
# MAX_SOURCE_IDS_PER_RELATION=300
|
||||
### control chunk_ids limitation method: FIFO, KEEP
|
||||
### FIFO: First in first out
|
||||
### KEEP: Keep oldest (less merge action and faster)
|
||||
# SOURCE_IDS_LIMIT_METHOD=FIFO
|
||||
|
||||
# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
|
||||
# MAX_FILE_PATHS=100
|
||||
|
||||
### maximum number of related chunks per source entity or relation
|
||||
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
||||
### Higher values increase re-ranking time
|
||||
# RELATED_CHUNK_NUMBER=5
|
||||
|
||||
###############################
|
||||
### Concurrency Configuration
|
||||
###############################
|
||||
|
|
@ -168,13 +152,10 @@ MAX_PARALLEL_INSERT=2
|
|||
### Num of chunks send to Embedding in single request
|
||||
# EMBEDDING_BATCH_NUM=10
|
||||
|
||||
###########################################################################
|
||||
###########################################################
|
||||
### LLM Configuration
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
|
||||
### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
|
||||
### If LightRAG deployed in Docker:
|
||||
### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
|
||||
###########################################################################
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
||||
###########################################################
|
||||
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
|
||||
# LLM_TIMEOUT=180
|
||||
|
||||
|
|
@ -183,13 +164,9 @@ LLM_MODEL=gpt-4o
|
|||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your_api_key
|
||||
|
||||
### Azure OpenAI example
|
||||
### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead
|
||||
### Optional for Azure
|
||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||
# LLM_BINDING=azure_openai
|
||||
# LLM_BINDING_HOST=https://xxxx.openai.azure.com/
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
# LLM_MODEL=my-gpt-mini-deployment
|
||||
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
||||
|
||||
### Openrouter example
|
||||
# LLM_MODEL=google/gemini-2.5-flash
|
||||
|
|
@ -197,21 +174,18 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
# LLM_BINDING_API_KEY=your_api_key
|
||||
# LLM_BINDING=openai
|
||||
|
||||
### Gemini example
|
||||
# LLM_BINDING=gemini
|
||||
# LLM_MODEL=gemini-flash-latest
|
||||
# LLM_BINDING_API_KEY=your_gemini_api_key
|
||||
# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
|
||||
### OpenAI Compatible API Specific Parameters
|
||||
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
||||
# OPENAI_LLM_TEMPERATURE=0.9
|
||||
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
|
||||
### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
|
||||
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
|
||||
# OPENAI_LLM_MAX_TOKENS=9000
|
||||
### For OpenAI o1-mini or newer modles
|
||||
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
|
||||
### lightrag-server --llm-binding gemini --help
|
||||
### Gemini Specific Parameters
|
||||
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
|
||||
# GEMINI_LLM_TEMPERATURE=0.7
|
||||
### Enable Thinking
|
||||
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
|
||||
### Disable Thinking
|
||||
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
|
||||
#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
|
||||
# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
|
||||
### lightrag-server --llm-binding openai --help
|
||||
|
|
@ -222,16 +196,6 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
### Qwen3 Specific Parameters deploy by vLLM
|
||||
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
|
||||
|
||||
### OpenAI Compatible API Specific Parameters
|
||||
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
||||
# OPENAI_LLM_TEMPERATURE=0.9
|
||||
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
|
||||
### Typically, max_tokens does not include prompt content
|
||||
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
|
||||
# OPENAI_LLM_MAX_TOKENS=9000
|
||||
### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
|
||||
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for Ollama LLM
|
||||
### lightrag-server --llm-binding ollama --help
|
||||
### Ollama Server Specific Parameters
|
||||
|
|
@ -245,56 +209,30 @@ OLLAMA_LLM_NUM_CTX=32768
|
|||
### Bedrock Specific Parameters
|
||||
# BEDROCK_LLM_TEMPERATURE=1.0
|
||||
|
||||
#######################################################################################
|
||||
####################################################################################
|
||||
### Embedding Configuration (Should not be changed after the first file processed)
|
||||
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
|
||||
### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
|
||||
### If LightRAG deployed in Docker:
|
||||
### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
|
||||
#######################################################################################
|
||||
####################################################################################
|
||||
# EMBEDDING_TIMEOUT=30
|
||||
|
||||
### Control whether to send embedding_dim parameter to embedding API
|
||||
### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
|
||||
### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
|
||||
### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
|
||||
### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
|
||||
|
||||
# Ollama embedding
|
||||
# EMBEDDING_BINDING=ollama
|
||||
# EMBEDDING_MODEL=bge-m3:latest
|
||||
# EMBEDDING_DIM=1024
|
||||
# EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
|
||||
# EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
||||
### OpenAI compatible embedding
|
||||
EMBEDDING_BINDING=openai
|
||||
EMBEDDING_MODEL=text-embedding-3-large
|
||||
EMBEDDING_DIM=3072
|
||||
EMBEDDING_SEND_DIM=false
|
||||
EMBEDDING_TOKEN_LIMIT=8192
|
||||
EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_MODEL=bge-m3:latest
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
||||
### Optional for Azure embedding
|
||||
### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
|
||||
# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
|
||||
# EMBEDDING_BINDING=azure_openai
|
||||
# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
|
||||
# EMBEDDING_API_KEY=your_api_key
|
||||
# EMBEDDING_MODEL==my-text-embedding-3-large-deployment
|
||||
### OpenAI compatible (VoyageAI embedding openai compatible)
|
||||
# EMBEDDING_BINDING=openai
|
||||
# EMBEDDING_MODEL=text-embedding-3-large
|
||||
# EMBEDDING_DIM=3072
|
||||
|
||||
### Gemini embedding
|
||||
# EMBEDDING_BINDING=gemini
|
||||
# EMBEDDING_MODEL=gemini-embedding-001
|
||||
# EMBEDDING_DIM=1536
|
||||
# EMBEDDING_TOKEN_LIMIT=2048
|
||||
# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
|
||||
# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
# EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
### Gemini embedding requires sending dimension to server
|
||||
# EMBEDDING_SEND_DIM=true
|
||||
|
||||
### Optional for Azure
|
||||
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
||||
# AZURE_EMBEDDING_API_VERSION=2023-05-15
|
||||
# AZURE_EMBEDDING_ENDPOINT=your_endpoint
|
||||
# AZURE_EMBEDDING_API_KEY=your_api_key
|
||||
|
||||
### Jina AI Embedding
|
||||
# EMBEDDING_BINDING=jina
|
||||
|
|
@ -356,18 +294,14 @@ POSTGRES_USER=your_username
|
|||
POSTGRES_PASSWORD='your_password'
|
||||
POSTGRES_DATABASE=your_database
|
||||
POSTGRES_MAX_CONNECTIONS=12
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### POSTGRES_WORKSPACE=forced_workspace_name
|
||||
# POSTGRES_WORKSPACE=forced_workspace_name
|
||||
|
||||
### PostgreSQL Vector Storage Configuration
|
||||
### Vector storage type: HNSW, IVFFlat, VCHORDRQ
|
||||
### Vector storage type: HNSW, IVFFlat
|
||||
POSTGRES_VECTOR_INDEX_TYPE=HNSW
|
||||
POSTGRES_HNSW_M=16
|
||||
POSTGRES_HNSW_EF=200
|
||||
POSTGRES_IVFFLAT_LISTS=100
|
||||
POSTGRES_VCHORDRQ_BUILD_OPTIONS=
|
||||
POSTGRES_VCHORDRQ_PROBES=
|
||||
POSTGRES_VCHORDRQ_EPSILON=1.9
|
||||
|
||||
### PostgreSQL Connection Retry Configuration (Network Robustness)
|
||||
### Number of retry attempts (1-10, default: 3)
|
||||
|
|
@ -406,8 +340,7 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
|
|||
NEO4J_MAX_CONNECTION_LIFETIME=300
|
||||
NEO4J_LIVENESS_CHECK_TIMEOUT=30
|
||||
NEO4J_KEEP_ALIVE=true
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### NEO4J_WORKSPACE=forced_workspace_name
|
||||
# NEO4J_WORKSPACE=forced_workspace_name
|
||||
|
||||
### MongoDB Configuration
|
||||
MONGO_URI=mongodb://root:root@localhost:27017/
|
||||
|
|
@ -421,14 +354,12 @@ MILVUS_DB_NAME=lightrag
|
|||
# MILVUS_USER=root
|
||||
# MILVUS_PASSWORD=your_password
|
||||
# MILVUS_TOKEN=your_token
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### MILVUS_WORKSPACE=forced_workspace_name
|
||||
# MILVUS_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Qdrant
|
||||
QDRANT_URL=http://localhost:6333
|
||||
# QDRANT_API_KEY=your-api-key
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### QDRANT_WORKSPACE=forced_workspace_name
|
||||
# QDRANT_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Redis
|
||||
REDIS_URI=redis://localhost:6379
|
||||
|
|
@ -436,45 +367,11 @@ REDIS_SOCKET_TIMEOUT=30
|
|||
REDIS_CONNECT_TIMEOUT=10
|
||||
REDIS_MAX_CONNECTIONS=100
|
||||
REDIS_RETRY_ATTEMPTS=3
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### REDIS_WORKSPACE=forced_workspace_name
|
||||
# REDIS_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Memgraph Configuration
|
||||
MEMGRAPH_URI=bolt://localhost:7687
|
||||
MEMGRAPH_USERNAME=
|
||||
MEMGRAPH_PASSWORD=
|
||||
MEMGRAPH_DATABASE=memgraph
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### MEMGRAPH_WORKSPACE=forced_workspace_name
|
||||
|
||||
############################
|
||||
### Evaluation Configuration
|
||||
############################
|
||||
### RAGAS evaluation models (used for RAG quality assessment)
|
||||
### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
|
||||
### Default uses OpenAI models for evaluation
|
||||
|
||||
### LLM Configuration for Evaluation
|
||||
# EVAL_LLM_MODEL=gpt-4o-mini
|
||||
### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
|
||||
# EVAL_LLM_BINDING_API_KEY=your_api_key
|
||||
### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
|
||||
# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
|
||||
### Embedding Configuration for Evaluation
|
||||
# EVAL_EMBEDDING_MODEL=text-embedding-3-large
|
||||
### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
|
||||
# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
|
||||
### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
|
||||
# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
|
||||
### Performance Tuning
|
||||
### Number of concurrent test case evaluations
|
||||
### Lower values reduce API rate limit issues but increase evaluation time
|
||||
# EVAL_MAX_CONCURRENT=2
|
||||
### TOP_K query parameter of LightRAG (default: 10)
|
||||
### Number of entities or relations retrieved from KG
|
||||
# EVAL_QUERY_TOP_K=10
|
||||
### LLM request retry and timeout settings for evaluation
|
||||
# EVAL_LLM_MAX_RETRIES=5
|
||||
# EVAL_LLM_TIMEOUT=180
|
||||
# MEMGRAPH_WORKSPACE=forced_workspace_name
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue