<!-- .github/pull_request_template.md --> This PR contains the evaluation framework development for cognee ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Expanded evaluation framework now integrates asynchronous corpus building, question answering, and performance evaluation with adaptive benchmarks for improved metrics (correctness, exact match, and F1 score). - **Infrastructure** - Added database integration for persistent storage of questions, answers, and metrics. - Launched an interactive metrics dashboard featuring advanced visualizations. - Introduced an automated testing workflow for continuous quality assurance. - **Documentation** - Updated guidelines for generating concise, clear answers. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
import logging
|
|
import asyncio
|
|
from cognee.shared.utils import setup_logging
|
|
from evals.eval_framework.eval_config import EvalConfig
|
|
|
|
from evals.eval_framework.corpus_builder.run_corpus_builder import run_corpus_builder
|
|
from evals.eval_framework.answer_generation.run_question_answering_module import (
|
|
run_question_answering,
|
|
)
|
|
from evals.eval_framework.evaluation.run_evaluation_module import run_evaluation
|
|
|
|
# Configure logging
|
|
setup_logging(logging.INFO)
|
|
|
|
# Define parameters and file paths.
|
|
eval_params = EvalConfig().to_dict()
|
|
|
|
questions_file = "questions_output.json"
|
|
answers_file = "answers_output.json"
|
|
metrics_file = "metrics_output.json"
|
|
dashboard_path = "dashboard.html"
|
|
|
|
|
|
async def main():
|
|
# Corpus builder
|
|
await run_corpus_builder(eval_params)
|
|
|
|
# Question answering
|
|
await run_question_answering(eval_params)
|
|
|
|
# Metrics calculation + dashboard
|
|
await run_evaluation(eval_params)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
try:
|
|
loop.run_until_complete(main())
|
|
finally:
|
|
print("Done")
|