fix(evaluation): Move import-time validation to runtime and improve documentation

Changes: - Move sys.exit() calls from module level to __init__() method - Raise proper exceptions (ImportError, ValueError, EnvironmentError) instead of sys.exit() - Add lazy import for RAGEvaluator in __init__.py using __getattr__ - Update README to clarify sample_dataset.json contains generic test data (not personal) - Fix README to reflect actual output format (JSON + CSV, not HTML) - Improve documentation for custom test case creation Addresses code review feedback about import-time validation and module exports.
2025-11-03 05:56:38 +01:00 · 2025-11-03 05:56:38 +01:00 · 36694eb9f2
commit 36694eb9f2
parent c9e1c6c1c2
3 changed files with 85 additions and 53 deletions
--- a/lightrag/evaluation/README.md
+++ b/lightrag/evaluation/README.md
@ -1,6 +1,6 @@
-# 📊 Portfolio RAG Evaluation Framework
+# 📊 LightRAG Evaluation Framework
-RAGAS-based offline evaluation of your LightRAG portfolio system.
+RAGAS-based offline evaluation of your LightRAG system.
 ## What is RAGAS?
@ -25,14 +25,16 @@ Instead of requiring human-annotated ground truth, RAGAS uses state-of-the-art e
 ```
 lightrag/evaluation/
 ├── eval_rag_quality.py      # Main evaluation script
-├── sample_dataset.json        # Test cases with ground truth
+├── sample_dataset.json        # Generic LightRAG test cases (not personal data)
 ├── __init__.py              # Package init
 ├── results/                 # Output directory
-│   ├── results_YYYYMMDD_HHMMSS.json    # Raw metrics
+│   ├── results_YYYYMMDD_HHMMSS.json    # Raw metrics in JSON
-│   └── report_YYYYMMDD_HHMMSS.html     # Beautiful HTML report
+│   └── results_YYYYMMDD_HHMMSS.csv     # Metrics in CSV format
 └── README.md                # This file
 ```
 **Note:** `sample_dataset.json` contains **generic test questions** about LightRAG features (RAG systems, vector databases, deployment, etc.). This is **not personal portfolio data** - you can use these questions directly to test your own LightRAG installation.
 ---
 ## 🚀 Quick Start
@ -68,41 +70,47 @@ Results are saved automatically in `lightrag/evaluation/results/`:
 ```
 results/
-├── results_20241023_143022.json     ← Raw metrics (for analysis)
+├── results_20241023_143022.json     ← Raw metrics in JSON format
-└── report_20241023_143022.html      ← Beautiful HTML report 🌟
+└── results_20241023_143022.csv      ← Metrics in CSV format (for spreadsheets)
 ```
-**Open the HTML report in your browser to see:**
+**Results include:**
 - ✅ Overall RAGAS score
- 📊 Per-metric averages
+- 📊 Per-metric averages (Faithfulness, Answer Relevance, Context Recall, Context Precision)
 - 📋 Individual test case results
- 📈 Performance breakdown
+- 📈 Performance breakdown by question
 ---
 ## 📝 Test Dataset
-Edit `sample_dataset.json` to add your own test cases:
+The included `sample_dataset.json` contains **generic example questions** about LightRAG (RAG systems, vector databases, deployment, etc.). **This is NOT personal data** - it's meant as a template.
 **Important:** You should **replace these with test questions based on YOUR data** that you've injected into your RAG system.
 ### Creating Your Own Test Cases
 Edit `sample_dataset.json` with questions relevant to your indexed documents:
 ```json
 {
  "test_cases": [
    {
-      "question": "Your test question here",
+      "question": "Question based on your documents",
-      "ground_truth": "Expected answer with key information",
+      "ground_truth": "Expected answer from your data",
-      "project_context": "project_name"
+      "context": "topic_category"
    }
  ]
 }
 ```
-**Example:**
+**Example (for a technical portfolio):**
 ```json
 {
  "question": "Which projects use PyTorch?",
  "ground_truth": "The Neural ODE Project uses PyTorch with TorchODE library for continuous-time neural networks.",
-  "project_context": "neural_ode_project"
+  "context": "ml_projects"
 }
 ```
@ -229,18 +237,21 @@ for i in range(3):
 ---
-## 🎯 For Portfolio/Interview
+## 🎯 Using Evaluation Results
-**What to Highlight:**
+**What the Metrics Tell You:**
-1. ✅ **Quality Metrics**: "RAG system achieves 85% RAGAS score"
+1. ✅ **Quality Metrics**: Overall RAGAS score indicates system health
-2. ✅ **Evaluation Framework**: "Automated quality assessment with RAGAS"
+2. ✅ **Evaluation Framework**: Automated quality assessment with RAGAS
-3. ✅ **Best Practices**: "Offline evaluation pipeline for continuous improvement"
+3. ✅ **Best Practices**: Offline evaluation pipeline for continuous improvement
-4. ✅ **Production-Ready**: "Metrics-driven system optimization"
+4. ✅ **Production-Ready**: Metrics-driven system optimization
-**Example Statement:**
+**Example Use Cases:**
-> "I built an evaluation framework using RAGAS that measures RAG quality across faithfulness, relevance, and context coverage. The system achieves 85% average RAGAS score, with automated HTML reports for quality tracking."
+- Track RAG quality over time as you update your documents
 - Compare different retrieval modes (local, global, hybrid, mix)
 - Measure impact of chunking strategy changes
 - Validate system performance before deployment
 ---
--- a/lightrag/evaluation/init.py
+++ b/lightrag/evaluation/init.py
@ -4,13 +4,22 @@ LightRAG Evaluation Module
 RAGAS-based evaluation framework for assessing RAG system quality.
 Usage:
-    from lightrag.evaluation.eval_rag_quality import RAGEvaluator
+    from lightrag.evaluation import RAGEvaluator
    evaluator = RAGEvaluator()
    results = await evaluator.run()
-Note: RAGEvaluator is imported dynamically to avoid import errors
+Note: RAGEvaluator is imported lazily to avoid import errors
 when ragas/datasets are not installed.
 """
 __all__ = ["RAGEvaluator"]
 def __getattr__(name):
    """Lazy import to avoid dependency errors when ragas is not installed."""
    if name == "RAGEvaluator":
        from .eval_rag_quality import RAGEvaluator
        return RAGEvaluator
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
--- a/lightrag/evaluation/eval_rag_quality.py
+++ b/lightrag/evaluation/eval_rag_quality.py
@ -40,29 +40,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 project_root = Path(__file__).parent.parent.parent
 load_dotenv(project_root / ".env")
-# Setup OpenAI API key (required for RAGAS evaluation)
+# Conditional imports - will raise ImportError if dependencies not installed
 # Use LLM_BINDING_API_KEY when running with the OpenAI binding
 llm_binding = os.getenv("LLM_BINDING", "").lower()
 llm_binding_key = os.getenv("LLM_BINDING_API_KEY")
 # Validate LLM_BINDING is set to openai
 if llm_binding != "openai":
    logger.error(
        "❌ LLM_BINDING must be set to 'openai'. Current value: '%s'",
        llm_binding or "(not set)",
    )
    sys.exit(1)
 # Validate LLM_BINDING_API_KEY exists
 if not llm_binding_key:
    logger.error("❌ LLM_BINDING_API_KEY is not set. Cannot run RAGAS evaluation.")
    sys.exit(1)
 # Set OPENAI_API_KEY from LLM_BINDING_API_KEY
 os.environ["OPENAI_API_KEY"] = llm_binding_key
 logger.info("✅ LLM_BINDING: openai")
 try:
    from datasets import Dataset
    from ragas import evaluate
@ -72,10 +50,12 @@ try:
        context_recall,
        faithfulness,
    )
-except ImportError as e:
+
-    logger.error("❌ RAGAS import error: %s", e)
+    RAGAS_AVAILABLE = True
-    logger.error("   Install with: pip install ragas datasets")
+except ImportError:
-    sys.exit(1)
+    RAGAS_AVAILABLE = False
    Dataset = None
    evaluate = None
 CONNECT_TIMEOUT_SECONDS = 180.0
@ -99,7 +79,39 @@ class RAGEvaluator:
            test_dataset_path: Path to test dataset JSON file
            rag_api_url: Base URL of LightRAG API (e.g., http://localhost:9621)
                        If None, will try to read from environment or use default
        Raises:
            ImportError: If ragas or datasets packages are not installed
            ValueError: If LLM_BINDING is not set to 'openai'
            EnvironmentError: If LLM_BINDING_API_KEY is not set
        """
        # Validate RAGAS dependencies are installed
        if not RAGAS_AVAILABLE:
            raise ImportError(
                "RAGAS dependencies not installed. "
                "Install with: pip install ragas datasets"
            )
        # Validate LLM_BINDING is set to openai (required for RAGAS)
        llm_binding = os.getenv("LLM_BINDING", "").lower()
        if llm_binding != "openai":
            raise ValueError(
                f"LLM_BINDING must be set to 'openai' for RAGAS evaluation. "
                f"Current value: '{llm_binding or '(not set)'}'"
            )
        # Validate LLM_BINDING_API_KEY exists
        llm_binding_key = os.getenv("LLM_BINDING_API_KEY")
        if not llm_binding_key:
            raise EnvironmentError(
                "LLM_BINDING_API_KEY environment variable is not set. "
                "This is required for RAGAS evaluation."
            )
        # Set OPENAI_API_KEY from LLM_BINDING_API_KEY for RAGAS
        os.environ["OPENAI_API_KEY"] = llm_binding_key
        logger.info("✅ LLM_BINDING: openai")
        if test_dataset_path is None:
            test_dataset_path = Path(__file__).parent / "sample_dataset.json"