cherry-pick c358f405
This commit is contained in:
parent
730c4030f7
commit
aba1c65d7f
3 changed files with 62 additions and 120 deletions
157
env.example
157
env.example
|
|
@ -29,7 +29,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
|
|||
# OLLAMA_EMULATING_MODEL_NAME=lightrag
|
||||
OLLAMA_EMULATING_MODEL_TAG=latest
|
||||
|
||||
### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
|
||||
### Max nodes return from graph retrieval in webui
|
||||
# MAX_GRAPH_NODES=1000
|
||||
|
||||
### Logging level
|
||||
|
|
@ -170,10 +170,8 @@ MAX_PARALLEL_INSERT=2
|
|||
|
||||
###########################################################################
|
||||
### LLM Configuration
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
||||
### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
|
||||
### If LightRAG deployed in Docker:
|
||||
### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
|
||||
###########################################################################
|
||||
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
|
||||
# LLM_TIMEOUT=180
|
||||
|
|
@ -183,13 +181,9 @@ LLM_MODEL=gpt-4o
|
|||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your_api_key
|
||||
|
||||
### Azure OpenAI example
|
||||
### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT istead
|
||||
### Optional for Azure
|
||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||
# LLM_BINDING=azure_openai
|
||||
# LLM_BINDING_HOST=https://xxxx.openai.azure.com/
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
# LLM_MODEL=gpt-5-mini
|
||||
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
||||
|
||||
### Openrouter example
|
||||
# LLM_MODEL=google/gemini-2.5-flash
|
||||
|
|
@ -197,21 +191,18 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
# LLM_BINDING_API_KEY=your_api_key
|
||||
# LLM_BINDING=openai
|
||||
|
||||
### Gemini example
|
||||
# LLM_BINDING=gemini
|
||||
# LLM_MODEL=gemini-flash-latest
|
||||
# LLM_BINDING_API_KEY=your_gemini_api_key
|
||||
# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
|
||||
### OpenAI Compatible API Specific Parameters
|
||||
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
||||
# OPENAI_LLM_TEMPERATURE=0.9
|
||||
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
|
||||
### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
|
||||
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
|
||||
# OPENAI_LLM_MAX_TOKENS=9000
|
||||
### For OpenAI o1-mini or newer modles
|
||||
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
|
||||
### lightrag-server --llm-binding gemini --help
|
||||
### Gemini Specific Parameters
|
||||
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
|
||||
# GEMINI_LLM_TEMPERATURE=0.7
|
||||
### Enable Thinking
|
||||
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
|
||||
### Disable Thinking
|
||||
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
|
||||
#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
|
||||
# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
|
||||
### lightrag-server --llm-binding openai --help
|
||||
|
|
@ -222,17 +213,8 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
### Qwen3 Specific Parameters deploy by vLLM
|
||||
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
|
||||
|
||||
### OpenAI Compatible API Specific Parameters
|
||||
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
||||
# OPENAI_LLM_TEMPERATURE=0.9
|
||||
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
|
||||
### Typically, max_tokens does not include prompt content
|
||||
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
|
||||
# OPENAI_LLM_MAX_TOKENS=9000
|
||||
### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
|
||||
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||
|
||||
### use the following command to see all support options for Ollama LLM
|
||||
### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
|
||||
### lightrag-server --llm-binding ollama --help
|
||||
### Ollama Server Specific Parameters
|
||||
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
|
||||
|
|
@ -249,52 +231,27 @@ OLLAMA_LLM_NUM_CTX=32768
|
|||
### Embedding Configuration (Should not be changed after the first file processed)
|
||||
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
|
||||
### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
|
||||
### If LightRAG deployed in Docker:
|
||||
### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
|
||||
#######################################################################################
|
||||
# EMBEDDING_TIMEOUT=30
|
||||
|
||||
### Control whether to send embedding_dim parameter to embedding API
|
||||
### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
|
||||
### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
|
||||
### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
|
||||
### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
|
||||
|
||||
# Ollama embedding
|
||||
# EMBEDDING_BINDING=ollama
|
||||
# EMBEDDING_MODEL=bge-m3:latest
|
||||
# EMBEDDING_DIM=1024
|
||||
# EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
### If LightRAG deployed in Docker uses host.docker.internal instead of localhost
|
||||
# EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
||||
### OpenAI compatible embedding
|
||||
EMBEDDING_BINDING=openai
|
||||
EMBEDDING_MODEL=text-embedding-3-large
|
||||
EMBEDDING_DIM=3072
|
||||
EMBEDDING_SEND_DIM=false
|
||||
EMBEDDING_TOKEN_LIMIT=8192
|
||||
EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_MODEL=bge-m3:latest
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
# If LightRAG deployed in Docker uses host.docker.internal instead of localhost
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
||||
### Optional for Azure embedding
|
||||
### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT istead
|
||||
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
||||
# EMBEDDING_BINDING=azure_openai
|
||||
# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
|
||||
# AZURE_EMBEDDING_API_KEY=your_api_key
|
||||
# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
|
||||
### OpenAI compatible (VoyageAI embedding openai compatible)
|
||||
# EMBEDDING_BINDING=openai
|
||||
# EMBEDDING_MODEL=text-embedding-3-large
|
||||
# EMBEDDING_DIM=3072
|
||||
|
||||
### Gemini embedding
|
||||
# EMBEDDING_BINDING=gemini
|
||||
# EMBEDDING_MODEL=gemini-embedding-001
|
||||
# EMBEDDING_DIM=1536
|
||||
# EMBEDDING_TOKEN_LIMIT=2048
|
||||
# EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
|
||||
# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
# EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
### Gemini embedding requires sending dimension to server
|
||||
# EMBEDDING_SEND_DIM=true
|
||||
|
||||
### Optional for Azure
|
||||
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
||||
# AZURE_EMBEDDING_API_VERSION=2023-05-15
|
||||
# AZURE_EMBEDDING_ENDPOINT=your_endpoint
|
||||
# AZURE_EMBEDDING_API_KEY=your_api_key
|
||||
|
||||
### Jina AI Embedding
|
||||
# EMBEDDING_BINDING=jina
|
||||
|
|
@ -356,18 +313,14 @@ POSTGRES_USER=your_username
|
|||
POSTGRES_PASSWORD='your_password'
|
||||
POSTGRES_DATABASE=your_database
|
||||
POSTGRES_MAX_CONNECTIONS=12
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### POSTGRES_WORKSPACE=forced_workspace_name
|
||||
# POSTGRES_WORKSPACE=forced_workspace_name
|
||||
|
||||
### PostgreSQL Vector Storage Configuration
|
||||
### Vector storage type: HNSW, IVFFlat, VCHORDRQ
|
||||
### Vector storage type: HNSW, IVFFlat
|
||||
POSTGRES_VECTOR_INDEX_TYPE=HNSW
|
||||
POSTGRES_HNSW_M=16
|
||||
POSTGRES_HNSW_EF=200
|
||||
POSTGRES_IVFFLAT_LISTS=100
|
||||
POSTGRES_VCHORDRQ_BUILD_OPTIONS=
|
||||
POSTGRES_VCHORDRQ_PROBES=
|
||||
POSTGRES_VCHORDRQ_EPSILON=1.9
|
||||
|
||||
### PostgreSQL Connection Retry Configuration (Network Robustness)
|
||||
### Number of retry attempts (1-10, default: 3)
|
||||
|
|
@ -406,8 +359,7 @@ NEO4J_MAX_TRANSACTION_RETRY_TIME=30
|
|||
NEO4J_MAX_CONNECTION_LIFETIME=300
|
||||
NEO4J_LIVENESS_CHECK_TIMEOUT=30
|
||||
NEO4J_KEEP_ALIVE=true
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### NEO4J_WORKSPACE=forced_workspace_name
|
||||
# NEO4J_WORKSPACE=forced_workspace_name
|
||||
|
||||
### MongoDB Configuration
|
||||
MONGO_URI=mongodb://root:root@localhost:27017/
|
||||
|
|
@ -421,14 +373,12 @@ MILVUS_DB_NAME=lightrag
|
|||
# MILVUS_USER=root
|
||||
# MILVUS_PASSWORD=your_password
|
||||
# MILVUS_TOKEN=your_token
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### MILVUS_WORKSPACE=forced_workspace_name
|
||||
# MILVUS_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Qdrant
|
||||
QDRANT_URL=http://localhost:6333
|
||||
# QDRANT_API_KEY=your-api-key
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### QDRANT_WORKSPACE=forced_workspace_name
|
||||
# QDRANT_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Redis
|
||||
REDIS_URI=redis://localhost:6379
|
||||
|
|
@ -436,50 +386,27 @@ REDIS_SOCKET_TIMEOUT=30
|
|||
REDIS_CONNECT_TIMEOUT=10
|
||||
REDIS_MAX_CONNECTIONS=100
|
||||
REDIS_RETRY_ATTEMPTS=3
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### REDIS_WORKSPACE=forced_workspace_name
|
||||
# REDIS_WORKSPACE=forced_workspace_name
|
||||
|
||||
### Memgraph Configuration
|
||||
MEMGRAPH_URI=bolt://localhost:7687
|
||||
MEMGRAPH_USERNAME=
|
||||
MEMGRAPH_PASSWORD=
|
||||
MEMGRAPH_DATABASE=memgraph
|
||||
### DB specific workspace should not be set, keep for compatible only
|
||||
### MEMGRAPH_WORKSPACE=forced_workspace_name
|
||||
|
||||
###########################################################
|
||||
### Langfuse Observability Configuration
|
||||
### Only works with LLM provided by OpenAI compatible API
|
||||
### Install with: pip install lightrag-hku[observability]
|
||||
### Sign up at: https://cloud.langfuse.com or self-host
|
||||
###########################################################
|
||||
# LANGFUSE_SECRET_KEY=""
|
||||
# LANGFUSE_PUBLIC_KEY=""
|
||||
# LANGFUSE_HOST="https://cloud.langfuse.com" # 或您的自托管实例地址
|
||||
# LANGFUSE_ENABLE_TRACE=true
|
||||
# MEMGRAPH_WORKSPACE=forced_workspace_name
|
||||
|
||||
############################
|
||||
### Evaluation Configuration
|
||||
############################
|
||||
### RAGAS evaluation models (used for RAG quality assessment)
|
||||
### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
|
||||
### Default uses OpenAI models for evaluation
|
||||
|
||||
### LLM Configuration for Evaluation
|
||||
# EVAL_LLM_MODEL=gpt-4o-mini
|
||||
### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
|
||||
# EVAL_EMBEDDING_MODEL=text-embedding-3-large
|
||||
### API key for evaluation (fallback to OPENAI_API_KEY if not set)
|
||||
# EVAL_LLM_BINDING_API_KEY=your_api_key
|
||||
### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
|
||||
### Custom endpoint for evaluation models (optional, for OpenAI-compatible services)
|
||||
# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
|
||||
### Embedding Configuration for Evaluation
|
||||
# EVAL_EMBEDDING_MODEL=text-embedding-3-large
|
||||
### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
|
||||
# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
|
||||
### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
|
||||
# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||
|
||||
### Performance Tuning
|
||||
### Number of concurrent test case evaluations
|
||||
### Lower values reduce API rate limit issues but increase evaluation time
|
||||
# EVAL_MAX_CONCURRENT=2
|
||||
|
|
@ -488,4 +415,4 @@ MEMGRAPH_DATABASE=memgraph
|
|||
# EVAL_QUERY_TOP_K=10
|
||||
### LLM request retry and timeout settings for evaluation
|
||||
# EVAL_LLM_MAX_RETRIES=5
|
||||
# EVAL_LLM_TIMEOUT=180
|
||||
# EVAL_LLM_TIMEOUT=120
|
||||
|
|
|
|||
|
|
@ -481,7 +481,7 @@ class RAGEvaluator:
|
|||
List of evaluation results with metrics
|
||||
"""
|
||||
# Get evaluation concurrency from environment (default to 1 for serial evaluation)
|
||||
max_async = int(os.getenv("EVAL_MAX_CONCURRENT", "3"))
|
||||
max_async = int(os.getenv("EVAL_MAX_CONCURRENT", "2"))
|
||||
|
||||
logger.info("%s", "=" * 70)
|
||||
logger.info("🚀 Starting RAGAS Evaluation of LightRAG System")
|
||||
|
|
|
|||
|
|
@ -3,17 +3,32 @@
|
|||
{
|
||||
"question": "How does LightRAG solve the hallucination problem in large language models?",
|
||||
"ground_truth": "LightRAG solves the hallucination problem by combining large language models with external knowledge retrieval. The framework ensures accurate responses by grounding LLM outputs in actual documents. LightRAG provides contextual responses that reduce hallucinations significantly.",
|
||||
"project": "lightrag_overview"
|
||||
"project": "lightrag_evaluation_sample"
|
||||
},
|
||||
{
|
||||
"question": "What are the three main components required in a RAG system?",
|
||||
"ground_truth": "A RAG system requires three main components: a retrieval system (vector database or search engine) to find relevant documents, an embedding model to convert text into vector representations for similarity search, and a large language model (LLM) to generate responses based on retrieved context.",
|
||||
"project": "rag_architecture"
|
||||
"project": "lightrag_evaluation_sample"
|
||||
},
|
||||
{
|
||||
"question": "How does LightRAG's retrieval performance compare to traditional RAG approaches?",
|
||||
"ground_truth": "LightRAG delivers faster retrieval performance than traditional RAG approaches. The framework optimizes document retrieval operations for speed, while traditional RAG systems often suffer from slow query response times. LightRAG achieves high quality results with improved performance.",
|
||||
"project": "lightrag_improvements"
|
||||
"ground_truth": "LightRAG delivers faster retrieval performance than traditional RAG approaches. The framework optimizes document retrieval operations for speed. Traditional RAG systems often suffer from slow query response times. LightRAG achieves high quality results with improved performance. The framework combines speed with accuracy in retrieval operations, prioritizing ease of use without sacrificing quality.",
|
||||
"project": "lightrag_evaluation_sample"
|
||||
},
|
||||
{
|
||||
"question": "What vector databases does LightRAG support and what are their key characteristics?",
|
||||
"ground_truth": "LightRAG supports multiple vector databases including ChromaDB for simple deployment and efficient similarity search, Neo4j for graph-based knowledge representation with vector capabilities, Milvus for high-performance vector search at scale, Qdrant for fast similarity search with filtering and production-ready infrastructure, MongoDB Atlas for combined document storage and vector search, Redis for in-memory low-latency vector search, and a built-in nano-vectordb that eliminates external dependencies for small projects. This multi-database support enables developers to choose appropriate backends based on scale, performance, and infrastructure requirements.",
|
||||
"project": "lightrag_evaluation_sample"
|
||||
},
|
||||
{
|
||||
"question": "What are the four key metrics for evaluating RAG system quality and what does each metric measure?",
|
||||
"ground_truth": "RAG system quality is measured through four key metrics: Faithfulness measures whether answers are factually grounded in retrieved context and detects hallucinations. Answer Relevance measures how well answers address the user question and evaluates response appropriateness. Context Recall measures completeness of retrieval and whether all relevant information was retrieved from documents. Context Precision measures quality and relevance of retrieved documents without noise or irrelevant content.",
|
||||
"project": "lightrag_evaluation_sample"
|
||||
},
|
||||
{
|
||||
"question": "What are the core benefits of LightRAG and how does it improve upon traditional RAG systems?",
|
||||
"ground_truth": "LightRAG offers five core benefits: accuracy through document-grounded responses, up-to-date information without model retraining, domain expertise through specialized document collections, cost-effectiveness by avoiding expensive fine-tuning, and transparency by showing source documents. Compared to traditional RAG systems, LightRAG provides a simpler API with intuitive interfaces, faster retrieval performance with optimized operations, better integration with multiple vector database backends for flexible selection, and optimized prompting strategies with refined templates. LightRAG prioritizes ease of use while maintaining quality and combines speed with accuracy.",
|
||||
"project": "lightrag_evaluation_sample"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue