<!-- .github/pull_request_template.md --> This PR contains the evaluation framework development for cognee ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Expanded evaluation framework now integrates asynchronous corpus building, question answering, and performance evaluation with adaptive benchmarks for improved metrics (correctness, exact match, and F1 score). - **Infrastructure** - Added database integration for persistent storage of questions, answers, and metrics. - Launched an interactive metrics dashboard featuring advanced visualizations. - Introduced an automated testing workflow for continuous quality assurance. - **Documentation** - Updated guidelines for generating concise, clear answers. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
20 lines
939 B
Python
20 lines
939 B
Python
from typing import List, Dict, Any, Union
|
|
from evals.eval_framework.evaluation.evaluator_adapters import EvaluatorAdapter
|
|
|
|
|
|
class EvaluationExecutor:
|
|
def __init__(self, evaluator_engine: Union[str, EvaluatorAdapter, Any] = "DeepEval") -> None:
|
|
if isinstance(evaluator_engine, str):
|
|
try:
|
|
adapter_enum = EvaluatorAdapter(evaluator_engine)
|
|
except ValueError:
|
|
raise ValueError(f"Unsupported evaluator: {evaluator_engine}")
|
|
self.eval_adapter = adapter_enum.adapter_class()
|
|
elif isinstance(evaluator_engine, EvaluatorAdapter):
|
|
self.eval_adapter = evaluator_engine.adapter_class()
|
|
else:
|
|
self.eval_adapter = evaluator_engine
|
|
|
|
async def execute(self, answers: List[Dict[str, str]], evaluator_metrics: Any) -> Any:
|
|
metrics = await self.eval_adapter.evaluate_answers(answers, evaluator_metrics)
|
|
return metrics
|