<!-- .github/pull_request_template.md --> This PR contains the evaluation framework development for cognee ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Expanded evaluation framework now integrates asynchronous corpus building, question answering, and performance evaluation with adaptive benchmarks for improved metrics (correctness, exact match, and F1 score). - **Infrastructure** - Added database integration for persistent storage of questions, answers, and metrics. - Launched an interactive metrics dashboard featuring advanced visualizations. - Introduced an automated testing workflow for continuous quality assurance. - **Documentation** - Updated guidelines for generating concise, clear answers. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
15 lines
628 B
Python
15 lines
628 B
Python
from deepeval.test_case import LLMTestCase
|
|
from typing import Optional
|
|
|
|
|
|
class ExactMatchMetric:
|
|
def __init__(self) -> None:
|
|
self.score: Optional[float] = None
|
|
self.reason: Optional[str] = None
|
|
|
|
def measure(self, test_case: "LLMTestCase") -> float:
|
|
actual = test_case.actual_output.strip().lower() if test_case.actual_output else ""
|
|
expected = test_case.expected_output.strip().lower() if test_case.expected_output else ""
|
|
self.score = 1.0 if actual == expected else 0.0
|
|
self.reason = "Exact match" if self.score == 1.0 else "Not an exact match"
|
|
return self.score
|