fix(evaluation): Move import-time validation to runtime and improve documentation

Changes: - Move sys.exit() calls from module level to __init__() method - Raise proper exceptions (ImportError, ValueError, EnvironmentError) instead of sys.exit() - Add lazy import for RAGEvaluator in __init__.py using __getattr__ - Update README to clarify sample_dataset.json contains generic test data (not personal) - Fix README to reflect actual output format (JSON + CSV, not HTML) - Improve documentation for custom test case creation Addresses code review feedback about import-time validation and module exports.
2025-11-03 05:56:38 +01:00 · 2025-11-03 05:56:38 +01:00 · 36694eb9f2
commit 36694eb9f2
parent c9e1c6c1c2
3 changed files with 85 additions and 53 deletions
--- a/lightrag/evaluation/README.md
+++ b/lightrag/evaluation/README.md
@ -1,6 +1,6 @@
-# 📊 Portfolio RAG Evaluation Framework
+# 📊 LightRAG Evaluation Framework

-RAGAS-based offline evaluation of your LightRAG portfolio system.
+RAGAS-based offline evaluation of your LightRAG system.

 ## What is RAGAS?

@ -25,14 +25,16 @@ Instead of requiring human-annotated ground truth, RAGAS uses state-of-the-art e
 ```
 lightrag/evaluation/
 ├── eval_rag_quality.py      # Main evaluation script
-├── sample_dataset.json        # Test cases with ground truth
+├── sample_dataset.json        # Generic LightRAG test cases (not personal data)
 ├── __init__.py              # Package init
 ├── results/                 # Output directory
-│   ├── results_YYYYMMDD_HHMMSS.json    # Raw metrics
-│   └── report_YYYYMMDD_HHMMSS.html     # Beautiful HTML report
+│   ├── results_YYYYMMDD_HHMMSS.json    # Raw metrics in JSON
+│   └── results_YYYYMMDD_HHMMSS.csv     # Metrics in CSV format
 └── README.md                # This file
 ```

+**Note:** `sample_dataset.json` contains **generic test questions** about LightRAG features (RAG systems, vector databases, deployment, etc.). This is **not personal portfolio data** - you can use these questions directly to test your own LightRAG installation.
+
 ---

 ## 🚀 Quick Start
@ -68,41 +70,47 @@ Results are saved automatically in `lightrag/evaluation/results/`:

 ```
 results/
-├── results_20241023_143022.json     ← Raw metrics (for analysis)
-└── report_20241023_143022.html      ← Beautiful HTML report 🌟
+├── results_20241023_143022.json     ← Raw metrics in JSON format
+└── results_20241023_143022.csv      ← Metrics in CSV format (for spreadsheets)
 ```

-**Open the HTML report in your browser to see:**
+**Results include:**
 - ✅ Overall RAGAS score
- 📊 Per-metric averages
+- 📊 Per-metric averages (Faithfulness, Answer Relevance, Context Recall, Context Precision)
 - 📋 Individual test case results
- 📈 Performance breakdown
+- 📈 Performance breakdown by question

 ---

 ## 📝 Test Dataset

-Edit `sample_dataset.json` to add your own test cases:
+The included `sample_dataset.json` contains **generic example questions** about LightRAG (RAG systems, vector databases, deployment, etc.). **This is NOT personal data** - it's meant as a template.
+
+**Important:** You should **replace these with test questions based on YOUR data** that you've injected into your RAG system.
+
+### Creating Your Own Test Cases
+
+Edit `sample_dataset.json` with questions relevant to your indexed documents:

 ```json
 {
  "test_cases": [
    {
-      "question": "Your test question here",
-      "ground_truth": "Expected answer with key information",
-      "project_context": "project_name"
+      "question": "Question based on your documents",
+      "ground_truth": "Expected answer from your data",
+      "context": "topic_category"
    }
  ]
 }
 ```

-**Example:**
+**Example (for a technical portfolio):**

 ```json
 {
  "question": "Which projects use PyTorch?",
  "ground_truth": "The Neural ODE Project uses PyTorch with TorchODE library for continuous-time neural networks.",
-  "project_context": "neural_ode_project"
+  "context": "ml_projects"
 }
 ```

@ -229,18 +237,21 @@ for i in range(3):

 ---

-## 🎯 For Portfolio/Interview
+## 🎯 Using Evaluation Results

-**What to Highlight:**
+**What the Metrics Tell You:**

-1. ✅ **Quality Metrics**: "RAG system achieves 85% RAGAS score"
-2. ✅ **Evaluation Framework**: "Automated quality assessment with RAGAS"
-3. ✅ **Best Practices**: "Offline evaluation pipeline for continuous improvement"
-4. ✅ **Production-Ready**: "Metrics-driven system optimization"
+1. ✅ **Quality Metrics**: Overall RAGAS score indicates system health
+2. ✅ **Evaluation Framework**: Automated quality assessment with RAGAS
+3. ✅ **Best Practices**: Offline evaluation pipeline for continuous improvement
+4. ✅ **Production-Ready**: Metrics-driven system optimization

-**Example Statement:**
+**Example Use Cases:**

-> "I built an evaluation framework using RAGAS that measures RAG quality across faithfulness, relevance, and context coverage. The system achieves 85% average RAGAS score, with automated HTML reports for quality tracking."
+- Track RAG quality over time as you update your documents
+- Compare different retrieval modes (local, global, hybrid, mix)
+- Measure impact of chunking strategy changes
+- Validate system performance before deployment

 ---

--- a/lightrag/evaluation/init.py
+++ b/lightrag/evaluation/init.py
@ -4,13 +4,22 @@ LightRAG Evaluation Module
 RAGAS-based evaluation framework for assessing RAG system quality.

 Usage:
-    from lightrag.evaluation.eval_rag_quality import RAGEvaluator
+    from lightrag.evaluation import RAGEvaluator

    evaluator = RAGEvaluator()
    results = await evaluator.run()

-Note: RAGEvaluator is imported dynamically to avoid import errors
+Note: RAGEvaluator is imported lazily to avoid import errors
 when ragas/datasets are not installed.
 """

 __all__ = ["RAGEvaluator"]
+
+
+def __getattr__(name):
+    """Lazy import to avoid dependency errors when ragas is not installed."""
+    if name == "RAGEvaluator":
+        from .eval_rag_quality import RAGEvaluator
+
+        return RAGEvaluator
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
--- a/lightrag/evaluation/eval_rag_quality.py
+++ b/lightrag/evaluation/eval_rag_quality.py
@ -40,29 +40,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 project_root = Path(__file__).parent.parent.parent
 load_dotenv(project_root / ".env")

-# Setup OpenAI API key (required for RAGAS evaluation)
-# Use LLM_BINDING_API_KEY when running with the OpenAI binding
-
-llm_binding = os.getenv("LLM_BINDING", "").lower()
-llm_binding_key = os.getenv("LLM_BINDING_API_KEY")
-
-# Validate LLM_BINDING is set to openai
-if llm_binding != "openai":
-    logger.error(
-        "❌ LLM_BINDING must be set to 'openai'. Current value: '%s'",
-        llm_binding or "(not set)",
-    )
-    sys.exit(1)
-
-# Validate LLM_BINDING_API_KEY exists
-if not llm_binding_key:
-    logger.error("❌ LLM_BINDING_API_KEY is not set. Cannot run RAGAS evaluation.")
-    sys.exit(1)
-
-# Set OPENAI_API_KEY from LLM_BINDING_API_KEY
-os.environ["OPENAI_API_KEY"] = llm_binding_key
-logger.info("✅ LLM_BINDING: openai")
-
+# Conditional imports - will raise ImportError if dependencies not installed
 try:
    from datasets import Dataset
    from ragas import evaluate
@ -72,10 +50,12 @@ try:
        context_recall,
        faithfulness,
    )
-except ImportError as e:
-    logger.error("❌ RAGAS import error: %s", e)
-    logger.error("   Install with: pip install ragas datasets")
-    sys.exit(1)
+
+    RAGAS_AVAILABLE = True
+except ImportError:
+    RAGAS_AVAILABLE = False
+    Dataset = None
+    evaluate = None


 CONNECT_TIMEOUT_SECONDS = 180.0
@ -99,7 +79,39 @@ class RAGEvaluator:
            test_dataset_path: Path to test dataset JSON file
            rag_api_url: Base URL of LightRAG API (e.g., http://localhost:9621)
                        If None, will try to read from environment or use default
+
+        Raises:
+            ImportError: If ragas or datasets packages are not installed
+            ValueError: If LLM_BINDING is not set to 'openai'
+            EnvironmentError: If LLM_BINDING_API_KEY is not set
        """
+        # Validate RAGAS dependencies are installed
+        if not RAGAS_AVAILABLE:
+            raise ImportError(
+                "RAGAS dependencies not installed. "
+                "Install with: pip install ragas datasets"
+            )
+
+        # Validate LLM_BINDING is set to openai (required for RAGAS)
+        llm_binding = os.getenv("LLM_BINDING", "").lower()
+        if llm_binding != "openai":
+            raise ValueError(
+                f"LLM_BINDING must be set to 'openai' for RAGAS evaluation. "
+                f"Current value: '{llm_binding or '(not set)'}'"
+            )
+
+        # Validate LLM_BINDING_API_KEY exists
+        llm_binding_key = os.getenv("LLM_BINDING_API_KEY")
+        if not llm_binding_key:
+            raise EnvironmentError(
+                "LLM_BINDING_API_KEY environment variable is not set. "
+                "This is required for RAGAS evaluation."
+            )
+
+        # Set OPENAI_API_KEY from LLM_BINDING_API_KEY for RAGAS
+        os.environ["OPENAI_API_KEY"] = llm_binding_key
+        logger.info("✅ LLM_BINDING: openai")
+
        if test_dataset_path is None:
            test_dataset_path = Path(__file__).parent / "sample_dataset.json"