<!-- .github/pull_request_template.md --> ## Description - Compare retrieved context to golden context using deepeval's summarization metric - Display relevant fields to each metric on metrics dashboard Example output:  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced context handling in answer generation and corpus building to include extended details. - Introduced a new context coverage metric for deeper evaluation insights. - Upgraded the evaluation dashboard with dynamic presentation of metric details. - Added a new parameter to support loading golden context in corpus loading methods. - **Bug Fixes** - Improved clarity in how answers are structured and appended in the answer generation process. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
28 lines
1.2 KiB
Python
28 lines
1.2 KiB
Python
from typing import List, Dict, Any, Union
|
|
from cognee.eval_framework.evaluation.evaluator_adapters import EvaluatorAdapter
|
|
|
|
|
|
class EvaluationExecutor:
|
|
def __init__(
|
|
self,
|
|
evaluator_engine: Union[str, EvaluatorAdapter, Any] = "DeepEval",
|
|
evaluate_contexts: bool = False,
|
|
) -> None:
|
|
if isinstance(evaluator_engine, str):
|
|
try:
|
|
adapter_enum = EvaluatorAdapter(evaluator_engine)
|
|
except ValueError:
|
|
raise ValueError(f"Unsupported evaluator: {evaluator_engine}")
|
|
self.eval_adapter = adapter_enum.adapter_class()
|
|
elif isinstance(evaluator_engine, EvaluatorAdapter):
|
|
self.eval_adapter = evaluator_engine.adapter_class()
|
|
else:
|
|
self.eval_adapter = evaluator_engine
|
|
self.evaluate_contexts = evaluate_contexts
|
|
|
|
async def execute(self, answers: List[Dict[str, str]], evaluator_metrics: Any) -> Any:
|
|
if self.evaluate_contexts:
|
|
evaluator_metrics.append("contextual_relevancy")
|
|
evaluator_metrics.append("context_coverage")
|
|
metrics = await self.eval_adapter.evaluate_answers(answers, evaluator_metrics)
|
|
return metrics
|