fix: Apply ruff formatting and rename test_dataset to sample_dataset

**Lint Fixes (ruff)**:
- Sort imports alphabetically (I001)
- Add blank line after import traceback (E302)
- Add trailing comma to dict literals (COM812)
- Reformat writer.writerow for readability (E501)

**Rename test_dataset.json → sample_dataset.json**:
- Avoids .gitignore pattern conflict (test_* is ignored)
- More descriptive name - it's a sample/template, not actual test data
- Updated all references in eval_rag_quality.py and README.md

Resolves lint-and-format CI check failure.
Addresses reviewer feedback about test dataset naming.
This commit is contained in:
anouarbm 2025-11-02 10:36:03 +01:00
parent aa916f28d2
commit 5cdb4b0ef2
3 changed files with 34 additions and 30 deletions

View file

@ -25,7 +25,7 @@ Instead of requiring human-annotated ground truth, RAGAS uses state-of-the-art e
```
lightrag/evaluation/
├── eval_rag_quality.py # Main evaluation script
├── test_dataset.json # Test cases with ground truth
├── sample_dataset.json # Test cases with ground truth
├── __init__.py # Package init
├── results/ # Output directory
│ ├── results_YYYYMMDD_HHMMSS.json # Raw metrics
@ -82,7 +82,7 @@ results/
## 📝 Test Dataset
Edit `test_dataset.json` to add your own test cases:
Edit `sample_dataset.json` to add your own test cases:
```json
{
@ -268,7 +268,7 @@ for i in range(3):
pip install ragas datasets
```
### "No test_dataset.json found"
### "No sample_dataset.json found"
Make sure you're running from the project root:
@ -297,7 +297,7 @@ Current implementation uses ground truth as mock responses. Results will show pe
## 📝 Next Steps
1. ✅ Review test dataset in `test_dataset.json`
1. ✅ Review test dataset in `sample_dataset.json`
2. ✅ Run `python lightrag/evaluation/eval_rag_quality.py`
3. ✅ Open the HTML report in browser
4. 🔄 Integrate with actual LightRAG system

View file

@ -18,16 +18,17 @@ Results are saved to: lightrag/evaluation/results/
- results_YYYYMMDD_HHMMSS.json (Full results with details)
"""
import json
import asyncio
import time
import csv
from pathlib import Path
from datetime import datetime
from typing import Any, Dict, List
import sys
import httpx
import json
import os
import sys
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
import httpx
from dotenv import load_dotenv
# Add parent directory to path
@ -46,14 +47,14 @@ if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = input("Enter your OpenAI API key: ")
try:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
faithfulness,
answer_relevancy,
context_recall,
context_precision,
context_recall,
faithfulness,
)
from datasets import Dataset
except ImportError as e:
print(f"❌ RAGAS import error: {e}")
print(" Install with: pip install ragas datasets")
@ -73,7 +74,7 @@ class RAGEvaluator:
If None, will try to read from environment or use default
"""
if test_dataset_path is None:
test_dataset_path = Path(__file__).parent / "test_dataset.json"
test_dataset_path = Path(__file__).parent / "sample_dataset.json"
if rag_api_url is None:
rag_api_url = os.getenv("LIGHTRAG_API_URL", "http://localhost:8000")
@ -247,6 +248,7 @@ class RAGEvaluator:
except Exception as e:
import traceback
print(f" ❌ Error evaluating: {str(e)}")
print(f" 🔍 Full traceback:\n{traceback.format_exc()}\n")
result = {
@ -254,7 +256,7 @@ class RAGEvaluator:
"error": str(e),
"metrics": {},
"ragas_score": 0,
"timestamp": datetime.now().isoformat()
"timestamp": datetime.now().isoformat(),
}
results.append(result)
@ -301,18 +303,20 @@ class RAGEvaluator:
for idx, result in enumerate(results, 1):
metrics = result.get("metrics", {})
writer.writerow({
"test_number": idx,
"question": result.get("question", ""),
"project": result.get("project", "unknown"),
"faithfulness": f"{metrics.get('faithfulness', 0):.4f}",
"answer_relevance": f"{metrics.get('answer_relevance', 0):.4f}",
"context_recall": f"{metrics.get('context_recall', 0):.4f}",
"context_precision": f"{metrics.get('context_precision', 0):.4f}",
"ragas_score": f"{result.get('ragas_score', 0):.4f}",
"status": "success" if metrics else "error",
"timestamp": result.get("timestamp", ""),
})
writer.writerow(
{
"test_number": idx,
"question": result.get("question", ""),
"project": result.get("project", "unknown"),
"faithfulness": f"{metrics.get('faithfulness', 0):.4f}",
"answer_relevance": f"{metrics.get('answer_relevance', 0):.4f}",
"context_recall": f"{metrics.get('context_recall', 0):.4f}",
"context_precision": f"{metrics.get('context_precision', 0):.4f}",
"ragas_score": f"{result.get('ragas_score', 0):.4f}",
"status": "success" if metrics else "error",
"timestamp": result.get("timestamp", ""),
}
)
return csv_path
@ -331,7 +335,7 @@ class RAGEvaluator:
"timestamp": datetime.now().isoformat(),
"total_tests": len(results),
"elapsed_time_seconds": round(elapsed_time, 2),
"results": results
"results": results,
}
# Save JSON results
@ -380,7 +384,7 @@ async def main():
if rag_api_url:
print(f"📡 RAG API URL: {rag_api_url}")
else:
print(f"📡 RAG API URL: http://localhost:8000 (default)")
print("📡 RAG API URL: http://localhost:8000 (default)")
print("="*70 + "\n")
evaluator = RAGEvaluator(rag_api_url=rag_api_url)