fix: Apply ruff formatting and rename test_dataset to sample_dataset

**Lint Fixes (ruff)**:
- Sort imports alphabetically (I001)
- Add blank line after import traceback (E302)
- Add trailing comma to dict literals (COM812)
- Reformat writer.writerow for readability (E501)

**Rename test_dataset.json → sample_dataset.json**:
- Avoids .gitignore pattern conflict (test_* is ignored)
- More descriptive name - it's a sample/template, not actual test data
- Updated all references in eval_rag_quality.py and README.md

Resolves lint-and-format CI check failure.
Addresses reviewer feedback about test dataset naming.
This commit is contained in:
anouarbm 2025-11-02 10:36:03 +01:00
parent aa916f28d2
commit 5cdb4b0ef2
3 changed files with 34 additions and 30 deletions

View file

@ -25,7 +25,7 @@ Instead of requiring human-annotated ground truth, RAGAS uses state-of-the-art e
``` ```
lightrag/evaluation/ lightrag/evaluation/
├── eval_rag_quality.py # Main evaluation script ├── eval_rag_quality.py # Main evaluation script
├── test_dataset.json # Test cases with ground truth ├── sample_dataset.json # Test cases with ground truth
├── __init__.py # Package init ├── __init__.py # Package init
├── results/ # Output directory ├── results/ # Output directory
│ ├── results_YYYYMMDD_HHMMSS.json # Raw metrics │ ├── results_YYYYMMDD_HHMMSS.json # Raw metrics
@ -82,7 +82,7 @@ results/
## 📝 Test Dataset ## 📝 Test Dataset
Edit `test_dataset.json` to add your own test cases: Edit `sample_dataset.json` to add your own test cases:
```json ```json
{ {
@ -268,7 +268,7 @@ for i in range(3):
pip install ragas datasets pip install ragas datasets
``` ```
### "No test_dataset.json found" ### "No sample_dataset.json found"
Make sure you're running from the project root: Make sure you're running from the project root:
@ -297,7 +297,7 @@ Current implementation uses ground truth as mock responses. Results will show pe
## 📝 Next Steps ## 📝 Next Steps
1. ✅ Review test dataset in `test_dataset.json` 1. ✅ Review test dataset in `sample_dataset.json`
2. ✅ Run `python lightrag/evaluation/eval_rag_quality.py` 2. ✅ Run `python lightrag/evaluation/eval_rag_quality.py`
3. ✅ Open the HTML report in browser 3. ✅ Open the HTML report in browser
4. 🔄 Integrate with actual LightRAG system 4. 🔄 Integrate with actual LightRAG system

View file

@ -18,16 +18,17 @@ Results are saved to: lightrag/evaluation/results/
- results_YYYYMMDD_HHMMSS.json (Full results with details) - results_YYYYMMDD_HHMMSS.json (Full results with details)
""" """
import json
import asyncio import asyncio
import time
import csv import csv
from pathlib import Path import json
from datetime import datetime
from typing import Any, Dict, List
import sys
import httpx
import os import os
import sys
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
import httpx
from dotenv import load_dotenv from dotenv import load_dotenv
# Add parent directory to path # Add parent directory to path
@ -46,14 +47,14 @@ if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = input("Enter your OpenAI API key: ") os.environ["OPENAI_API_KEY"] = input("Enter your OpenAI API key: ")
try: try:
from datasets import Dataset
from ragas import evaluate from ragas import evaluate
from ragas.metrics import ( from ragas.metrics import (
faithfulness,
answer_relevancy, answer_relevancy,
context_recall,
context_precision, context_precision,
context_recall,
faithfulness,
) )
from datasets import Dataset
except ImportError as e: except ImportError as e:
print(f"❌ RAGAS import error: {e}") print(f"❌ RAGAS import error: {e}")
print(" Install with: pip install ragas datasets") print(" Install with: pip install ragas datasets")
@ -73,7 +74,7 @@ class RAGEvaluator:
If None, will try to read from environment or use default If None, will try to read from environment or use default
""" """
if test_dataset_path is None: if test_dataset_path is None:
test_dataset_path = Path(__file__).parent / "test_dataset.json" test_dataset_path = Path(__file__).parent / "sample_dataset.json"
if rag_api_url is None: if rag_api_url is None:
rag_api_url = os.getenv("LIGHTRAG_API_URL", "http://localhost:8000") rag_api_url = os.getenv("LIGHTRAG_API_URL", "http://localhost:8000")
@ -247,6 +248,7 @@ class RAGEvaluator:
except Exception as e: except Exception as e:
import traceback import traceback
print(f" ❌ Error evaluating: {str(e)}") print(f" ❌ Error evaluating: {str(e)}")
print(f" 🔍 Full traceback:\n{traceback.format_exc()}\n") print(f" 🔍 Full traceback:\n{traceback.format_exc()}\n")
result = { result = {
@ -254,7 +256,7 @@ class RAGEvaluator:
"error": str(e), "error": str(e),
"metrics": {}, "metrics": {},
"ragas_score": 0, "ragas_score": 0,
"timestamp": datetime.now().isoformat() "timestamp": datetime.now().isoformat(),
} }
results.append(result) results.append(result)
@ -301,18 +303,20 @@ class RAGEvaluator:
for idx, result in enumerate(results, 1): for idx, result in enumerate(results, 1):
metrics = result.get("metrics", {}) metrics = result.get("metrics", {})
writer.writerow({ writer.writerow(
"test_number": idx, {
"question": result.get("question", ""), "test_number": idx,
"project": result.get("project", "unknown"), "question": result.get("question", ""),
"faithfulness": f"{metrics.get('faithfulness', 0):.4f}", "project": result.get("project", "unknown"),
"answer_relevance": f"{metrics.get('answer_relevance', 0):.4f}", "faithfulness": f"{metrics.get('faithfulness', 0):.4f}",
"context_recall": f"{metrics.get('context_recall', 0):.4f}", "answer_relevance": f"{metrics.get('answer_relevance', 0):.4f}",
"context_precision": f"{metrics.get('context_precision', 0):.4f}", "context_recall": f"{metrics.get('context_recall', 0):.4f}",
"ragas_score": f"{result.get('ragas_score', 0):.4f}", "context_precision": f"{metrics.get('context_precision', 0):.4f}",
"status": "success" if metrics else "error", "ragas_score": f"{result.get('ragas_score', 0):.4f}",
"timestamp": result.get("timestamp", ""), "status": "success" if metrics else "error",
}) "timestamp": result.get("timestamp", ""),
}
)
return csv_path return csv_path
@ -331,7 +335,7 @@ class RAGEvaluator:
"timestamp": datetime.now().isoformat(), "timestamp": datetime.now().isoformat(),
"total_tests": len(results), "total_tests": len(results),
"elapsed_time_seconds": round(elapsed_time, 2), "elapsed_time_seconds": round(elapsed_time, 2),
"results": results "results": results,
} }
# Save JSON results # Save JSON results
@ -380,7 +384,7 @@ async def main():
if rag_api_url: if rag_api_url:
print(f"📡 RAG API URL: {rag_api_url}") print(f"📡 RAG API URL: {rag_api_url}")
else: else:
print(f"📡 RAG API URL: http://localhost:8000 (default)") print("📡 RAG API URL: http://localhost:8000 (default)")
print("="*70 + "\n") print("="*70 + "\n")
evaluator = RAGEvaluator(rag_api_url=rag_api_url) evaluator = RAGEvaluator(rag_api_url=rag_api_url)