diff --git a/env.example b/env.example index cf9d472a..43bc759b 100644 --- a/env.example +++ b/env.example @@ -399,14 +399,24 @@ MEMGRAPH_DATABASE=memgraph ### Evaluation Configuration ############################ ### RAGAS evaluation models (used for RAG quality assessment) +### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible ### Default uses OpenAI models for evaluation + +### LLM Configuration for Evaluation # EVAL_LLM_MODEL=gpt-4o-mini -# EVAL_EMBEDDING_MODEL=text-embedding-3-large -### API key for evaluation (fallback to OPENAI_API_KEY if not set) +### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set) # EVAL_LLM_BINDING_API_KEY=your_api_key -### Custom endpoint for evaluation models (optional, for OpenAI-compatible services) +### Custom OpenAI-compatible endpoint for LLM evaluation (optional) # EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 +### Embedding Configuration for Evaluation +# EVAL_EMBEDDING_MODEL=text-embedding-3-large +### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) +# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key +### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST) +# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1 + +### Performance Tuning ### Number of concurrent test case evaluations ### Lower values reduce API rate limit issues but increase evaluation time # EVAL_MAX_CONCURRENT=2 @@ -415,4 +425,4 @@ MEMGRAPH_DATABASE=memgraph # EVAL_QUERY_TOP_K=10 ### LLM request retry and timeout settings for evaluation # EVAL_LLM_MAX_RETRIES=5 -# EVAL_LLM_TIMEOUT=120 +# EVAL_LLM_TIMEOUT=180 diff --git a/lightrag/evaluation/README.md b/lightrag/evaluation/README.md index 8848f29d..de1845ef 100644 --- a/lightrag/evaluation/README.md +++ b/lightrag/evaluation/README.md @@ -147,12 +147,22 @@ python lightrag/evaluation/eval_rag_quality.py --help The evaluation framework supports customization through environment variables: +**⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible** +- The RAGAS framework requires OpenAI-compatible API interfaces +- Custom endpoints must implement the OpenAI API format (e.g., vLLM, SGLang, LocalAI) +- Non-compatible endpoints will cause evaluation failures + | Variable | Default | Description | |----------|---------|-------------| +| **LLM Configuration** | | | | `EVAL_LLM_MODEL` | `gpt-4o-mini` | LLM model used for RAGAS evaluation | +| `EVAL_LLM_BINDING_API_KEY` | falls back to `OPENAI_API_KEY` | API key for LLM evaluation | +| `EVAL_LLM_BINDING_HOST` | (optional) | Custom OpenAI-compatible endpoint URL for LLM | +| **Embedding Configuration** | | | | `EVAL_EMBEDDING_MODEL` | `text-embedding-3-large` | Embedding model for evaluation | -| `EVAL_LLM_BINDING_API_KEY` | falls back to `OPENAI_API_KEY` | API key for evaluation models | -| `EVAL_LLM_BINDING_HOST` | (optional) | Custom endpoint URL for OpenAI-compatible services | +| `EVAL_EMBEDDING_BINDING_API_KEY` | falls back to `EVAL_LLM_BINDING_API_KEY` → `OPENAI_API_KEY` | API key for embeddings | +| `EVAL_EMBEDDING_BINDING_HOST` | falls back to `EVAL_LLM_BINDING_HOST` | Custom OpenAI-compatible endpoint URL for embeddings | +| **Performance Tuning** | | | | `EVAL_MAX_CONCURRENT` | 2 | Number of concurrent test case evaluations (1=serial) | | `EVAL_QUERY_TOP_K` | 10 | Number of documents to retrieve per query | | `EVAL_LLM_MAX_RETRIES` | 5 | Maximum LLM request retries | @@ -160,13 +170,14 @@ The evaluation framework supports customization through environment variables: ### Usage Examples -**Default Configuration (OpenAI):** +**Example 1: Default Configuration (OpenAI Official API)** ```bash export OPENAI_API_KEY=sk-xxx python lightrag/evaluation/eval_rag_quality.py ``` +Both LLM and embeddings use OpenAI's official API with default models. -**Custom Model:** +**Example 2: Custom Models on OpenAI** ```bash export OPENAI_API_KEY=sk-xxx export EVAL_LLM_MODEL=gpt-4o-mini @@ -174,11 +185,60 @@ export EVAL_EMBEDDING_MODEL=text-embedding-3-large python lightrag/evaluation/eval_rag_quality.py ``` -**OpenAI-Compatible Endpoint:** +**Example 3: Same Custom OpenAI-Compatible Endpoint for Both** ```bash +# Both LLM and embeddings use the same custom endpoint export EVAL_LLM_BINDING_API_KEY=your-custom-key -export EVAL_LLM_BINDING_HOST=https://api.openai.com/v1 +export EVAL_LLM_BINDING_HOST=http://localhost:8000/v1 export EVAL_LLM_MODEL=qwen-plus +export EVAL_EMBEDDING_MODEL=BAAI/bge-m3 +python lightrag/evaluation/eval_rag_quality.py +``` +Embeddings automatically inherit LLM endpoint configuration. + +**Example 4: Separate Endpoints (Cost Optimization)** +```bash +# Use OpenAI for LLM (high quality) +export EVAL_LLM_BINDING_API_KEY=sk-openai-key +export EVAL_LLM_MODEL=gpt-4o-mini +# No EVAL_LLM_BINDING_HOST means use OpenAI official API + +# Use local vLLM for embeddings (cost-effective) +export EVAL_EMBEDDING_BINDING_API_KEY=local-key +export EVAL_EMBEDDING_BINDING_HOST=http://localhost:8001/v1 +export EVAL_EMBEDDING_MODEL=BAAI/bge-m3 + +python lightrag/evaluation/eval_rag_quality.py +``` +LLM uses OpenAI official API, embeddings use local custom endpoint. + +**Example 5: Different Custom Endpoints for LLM and Embeddings** +```bash +# LLM on one OpenAI-compatible server +export EVAL_LLM_BINDING_API_KEY=key1 +export EVAL_LLM_BINDING_HOST=http://llm-server:8000/v1 +export EVAL_LLM_MODEL=custom-llm + +# Embeddings on another OpenAI-compatible server +export EVAL_EMBEDDING_BINDING_API_KEY=key2 +export EVAL_EMBEDDING_BINDING_HOST=http://embedding-server:8001/v1 +export EVAL_EMBEDDING_MODEL=custom-embedding + +python lightrag/evaluation/eval_rag_quality.py +``` +Both use different custom OpenAI-compatible endpoints. + +**Example 6: Using Environment Variables from .env File** +```bash +# Create .env file in project root +cat > .env << EOF +EVAL_LLM_BINDING_API_KEY=your-key +EVAL_LLM_BINDING_HOST=http://localhost:8000/v1 +EVAL_LLM_MODEL=qwen-plus +EVAL_EMBEDDING_MODEL=BAAI/bge-m3 +EOF + +# Run evaluation (automatically loads .env) python lightrag/evaluation/eval_rag_quality.py ``` diff --git a/lightrag/evaluation/eval_rag_quality.py b/lightrag/evaluation/eval_rag_quality.py index dfc89019..7b415090 100644 --- a/lightrag/evaluation/eval_rag_quality.py +++ b/lightrag/evaluation/eval_rag_quality.py @@ -127,8 +127,10 @@ class RAGEvaluator: Environment Variables: EVAL_LLM_MODEL: LLM model for evaluation (default: gpt-4o-mini) EVAL_EMBEDDING_MODEL: Embedding model for evaluation (default: text-embedding-3-small) - EVAL_LLM_BINDING_API_KEY: API key for evaluation models (fallback to OPENAI_API_KEY) - EVAL_LLM_BINDING_HOST: Custom endpoint URL for evaluation models (optional) + EVAL_LLM_BINDING_API_KEY: API key for LLM (fallback to OPENAI_API_KEY) + EVAL_LLM_BINDING_HOST: Custom endpoint URL for LLM (optional) + EVAL_EMBEDDING_BINDING_API_KEY: API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY) + EVAL_EMBEDDING_BINDING_HOST: Custom endpoint URL for embeddings (fallback: EVAL_LLM_BINDING_HOST) Raises: ImportError: If ragas or datasets packages are not installed @@ -141,11 +143,11 @@ class RAGEvaluator: "Install with: pip install ragas datasets" ) - # Configure evaluation models (for RAGAS scoring) - eval_api_key = os.getenv("EVAL_LLM_BINDING_API_KEY") or os.getenv( + # Configure evaluation LLM (for RAGAS scoring) + eval_llm_api_key = os.getenv("EVAL_LLM_BINDING_API_KEY") or os.getenv( "OPENAI_API_KEY" ) - if not eval_api_key: + if not eval_llm_api_key: raise EnvironmentError( "EVAL_LLM_BINDING_API_KEY or OPENAI_API_KEY is required for evaluation. " "Set EVAL_LLM_BINDING_API_KEY to use a custom API key, " @@ -153,23 +155,40 @@ class RAGEvaluator: ) eval_model = os.getenv("EVAL_LLM_MODEL", "gpt-4o-mini") + eval_llm_base_url = os.getenv("EVAL_LLM_BINDING_HOST") + + # Configure evaluation embeddings (for RAGAS scoring) + # Fallback chain: EVAL_EMBEDDING_BINDING_API_KEY -> EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY + eval_embedding_api_key = ( + os.getenv("EVAL_EMBEDDING_BINDING_API_KEY") + or os.getenv("EVAL_LLM_BINDING_API_KEY") + or os.getenv("OPENAI_API_KEY") + ) eval_embedding_model = os.getenv( "EVAL_EMBEDDING_MODEL", "text-embedding-3-large" ) - eval_base_url = os.getenv("EVAL_LLM_BINDING_HOST") + # Fallback chain: EVAL_EMBEDDING_BINDING_HOST -> EVAL_LLM_BINDING_HOST -> None + eval_embedding_base_url = os.getenv("EVAL_EMBEDDING_BINDING_HOST") or os.getenv( + "EVAL_LLM_BINDING_HOST" + ) # Create LLM and Embeddings instances for RAGAS llm_kwargs = { "model": eval_model, - "api_key": eval_api_key, + "api_key": eval_llm_api_key, "max_retries": int(os.getenv("EVAL_LLM_MAX_RETRIES", "5")), "request_timeout": int(os.getenv("EVAL_LLM_TIMEOUT", "180")), } - embedding_kwargs = {"model": eval_embedding_model, "api_key": eval_api_key} + embedding_kwargs = { + "model": eval_embedding_model, + "api_key": eval_embedding_api_key, + } - if eval_base_url: - llm_kwargs["base_url"] = eval_base_url - embedding_kwargs["base_url"] = eval_base_url + if eval_llm_base_url: + llm_kwargs["base_url"] = eval_llm_base_url + + if eval_embedding_base_url: + embedding_kwargs["base_url"] = eval_embedding_base_url # Create base LangChain LLM base_llm = ChatOpenAI(**llm_kwargs) @@ -209,7 +228,8 @@ class RAGEvaluator: # Store configuration values for display self.eval_model = eval_model self.eval_embedding_model = eval_embedding_model - self.eval_base_url = eval_base_url + self.eval_llm_base_url = eval_llm_base_url + self.eval_embedding_base_url = eval_embedding_base_url self.eval_max_retries = llm_kwargs["max_retries"] self.eval_timeout = llm_kwargs["request_timeout"] @@ -221,13 +241,29 @@ class RAGEvaluator: logger.info("Evaluation Models:") logger.info(" • LLM Model: %s", self.eval_model) logger.info(" • Embedding Model: %s", self.eval_embedding_model) - if self.eval_base_url: - logger.info(" • Custom Endpoint: %s", self.eval_base_url) + + # Display LLM endpoint + if self.eval_llm_base_url: + logger.info(" • LLM Endpoint: %s", self.eval_llm_base_url) logger.info( - " • Bypass N-Parameter: Enabled (use LangchainLLMWrapperfor compatibility)" + " • Bypass N-Parameter: Enabled (use LangchainLLMWrapper for compatibility)" ) else: - logger.info(" • Endpoint: OpenAI Official API") + logger.info(" • LLM Endpoint: OpenAI Official API") + + # Display Embedding endpoint (only if different from LLM) + if self.eval_embedding_base_url: + if self.eval_embedding_base_url != self.eval_llm_base_url: + logger.info( + " • Embedding Endpoint: %s", self.eval_embedding_base_url + ) + # If same as LLM endpoint, no need to display separately + elif not self.eval_llm_base_url: + # Both using OpenAI - already displayed above + pass + else: + # LLM uses custom endpoint, but embeddings use OpenAI + logger.info(" • Embedding Endpoint: OpenAI Official API") logger.info("Concurrency & Rate Limiting:") query_top_k = int(os.getenv("EVAL_QUERY_TOP_K", "10"))