fix: Use actual retrieved contexts for RAGAS evaluation
**Critical Fix: Contexts vs Ground Truth** - RAGAS metrics now evaluate actual retrieval performance - Previously: Used ground_truth as contexts (always perfect scores) - Now: Uses retrieved documents from LightRAG API (real evaluation) **Changes to generate_rag_response (lines 100-156)**: - Remove unused 'context' parameter - Change return type: Dict[str, str] → Dict[str, Any] - Extract contexts as list of strings from references[].text - Return 'contexts' key instead of 'context' (JSON dump) - Add response.raise_for_status() for better error handling - Add httpx.HTTPStatusError exception handler **Changes to evaluate_responses (lines 180-191)**: - Line 183: Extract retrieved_contexts from rag_response - Line 190: Use [retrieved_contexts] instead of [[ground_truth]] - Now correctly evaluates: retrieval quality, not ground_truth quality **Impact on RAGAS Metrics**: - Context Precision: Now ranks actual retrieved docs by relevance - Context Recall: Compares ground_truth against actual retrieval - Faithfulness: Verifies answer based on actual retrieved contexts - Answer Relevance: Unchanged (question-answer relevance) Fixes incorrect evaluation methodology. Based on RAGAS documentation: - contexts = retrieved documents from RAG system - ground_truth = reference answer for context_recall metric References: - https://docs.ragas.io/en/stable/concepts/components/eval_dataset/ - https://docs.ragas.io/en/stable/concepts/metrics/
This commit is contained in:
parent
b12b693a81
commit
026bca00d9
1 changed files with 24 additions and 25 deletions
|
|
@ -100,52 +100,46 @@ class RAGEvaluator:
|
||||||
async def generate_rag_response(
|
async def generate_rag_response(
|
||||||
self,
|
self,
|
||||||
question: str,
|
question: str,
|
||||||
context: str = None, # Not used - actual context comes from LightRAG
|
) -> Dict[str, Any]:
|
||||||
) -> Dict[str, str]:
|
|
||||||
"""
|
"""
|
||||||
Generate RAG response by calling LightRAG API
|
Generate RAG response by calling LightRAG API.
|
||||||
|
|
||||||
Calls the actual LightRAG /query endpoint instead of using mock data.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
question: The user query
|
question: The user query.
|
||||||
context: Ignored (for compatibility), actual context from LightRAG
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with 'answer' and 'context' keys
|
Dictionary with 'answer' and 'contexts' keys.
|
||||||
|
'contexts' is a list of strings (one per retrieved document).
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If LightRAG API is unavailable
|
Exception: If LightRAG API is unavailable.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
# Prepare request to LightRAG API
|
|
||||||
payload = {
|
payload = {
|
||||||
"query": question,
|
"query": question,
|
||||||
"mode": "mix", # Recommended: combines local & global
|
"mode": "mix",
|
||||||
"include_references": True,
|
"include_references": True,
|
||||||
"response_type": "Multiple Paragraphs",
|
"response_type": "Multiple Paragraphs",
|
||||||
"top_k": 10,
|
"top_k": 10,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Call LightRAG /query endpoint
|
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.rag_api_url}/query",
|
f"{self.rag_api_url}/query",
|
||||||
json=payload,
|
json=payload,
|
||||||
)
|
)
|
||||||
|
response.raise_for_status() # Better error handling
|
||||||
if response.status_code != 200:
|
|
||||||
raise Exception(
|
|
||||||
f"LightRAG API error {response.status_code}: {response.text}"
|
|
||||||
)
|
|
||||||
|
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
|
# Extract text content from each reference document
|
||||||
|
references = result.get("references", [])
|
||||||
|
contexts = [
|
||||||
|
ref.get("text", "") for ref in references if ref.get("text")
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"answer": result.get("response", "No response generated"),
|
"answer": result.get("response", "No response generated"),
|
||||||
"context": json.dumps(result.get("references", []))
|
"contexts": contexts, # List of strings, not JSON dump
|
||||||
if result.get("references")
|
|
||||||
else "",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
|
|
@ -154,6 +148,10 @@ class RAGEvaluator:
|
||||||
f" Make sure LightRAG server is running:\n"
|
f" Make sure LightRAG server is running:\n"
|
||||||
f" python -m lightrag.api.lightrag_server"
|
f" python -m lightrag.api.lightrag_server"
|
||||||
)
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Exception(
|
||||||
|
f"LightRAG API error {e.response.status_code}: {e.response.text}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"Error calling LightRAG API: {str(e)}")
|
raise Exception(f"Error calling LightRAG API: {str(e)}")
|
||||||
|
|
||||||
|
|
@ -179,14 +177,15 @@ class RAGEvaluator:
|
||||||
# Generate RAG response by calling actual LightRAG API
|
# Generate RAG response by calling actual LightRAG API
|
||||||
rag_response = await self.generate_rag_response(question=question)
|
rag_response = await self.generate_rag_response(question=question)
|
||||||
|
|
||||||
# Prepare dataset for RAGAS evaluation
|
# *** CRITICAL FIX: Use actual retrieved contexts, NOT ground_truth ***
|
||||||
|
retrieved_contexts = rag_response["contexts"]
|
||||||
|
|
||||||
|
# Prepare dataset for RAGAS evaluation with CORRECT contexts
|
||||||
eval_dataset = Dataset.from_dict(
|
eval_dataset = Dataset.from_dict(
|
||||||
{
|
{
|
||||||
"question": [question],
|
"question": [question],
|
||||||
"answer": [rag_response["answer"]],
|
"answer": [rag_response["answer"]],
|
||||||
"contexts": [
|
"contexts": [retrieved_contexts],
|
||||||
[ground_truth]
|
|
||||||
], # RAGAS expects list of context strings
|
|
||||||
"ground_truth": [ground_truth],
|
"ground_truth": [ground_truth],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue