cognee/evals/eval_framework/benchmark_adapters/twowikimultihop_adapter.py
hajdul88 6a0c0e3ef8
feat: Cognee evaluation framework development (#498)
<!-- .github/pull_request_template.md -->

This PR contains the evaluation framework development for cognee

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Expanded evaluation framework now integrates asynchronous corpus
building, question answering, and performance evaluation with adaptive
benchmarks for improved metrics (correctness, exact match, and F1
score).

- **Infrastructure**
- Added database integration for persistent storage of questions,
answers, and metrics.
- Launched an interactive metrics dashboard featuring advanced
visualizations.
- Introduced an automated testing workflow for continuous quality
assurance.

- **Documentation**
  - Updated guidelines for generating concise, clear answers.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-02-11 16:31:54 +01:00

49 lines
1.7 KiB
Python

import requests
import os
import json
import random
from typing import Optional, Union, Any, LiteralString
from evals.eval_framework.benchmark_adapters.base_benchmark_adapter import BaseBenchmarkAdapter
class TwoWikiMultihopAdapter(BaseBenchmarkAdapter):
dataset_info = {
"filename": "2wikimultihop_dev.json",
"URL": "https://huggingface.co/datasets/voidful/2WikiMultihopQA/resolve/main/dev.json",
}
def load_corpus(
self, limit: Optional[int] = None, seed: int = 42
) -> tuple[list[Union[LiteralString, str]], list[dict[str, Any]]]:
filename = self.dataset_info["filename"]
if os.path.exists(filename):
with open(filename, "r", encoding="utf-8") as f:
corpus_json = json.load(f)
else:
response = requests.get(self.dataset_info["URL"])
response.raise_for_status()
corpus_json = response.json()
with open(filename, "w", encoding="utf-8") as f:
json.dump(corpus_json, f, ensure_ascii=False, indent=4)
if limit is not None and 0 < limit < len(corpus_json):
random.seed(seed)
corpus_json = random.sample(corpus_json, limit)
corpus_list = []
question_answer_pairs = []
for dict in corpus_json:
for title, sentences in dict["context"]:
corpus_list.append(" ".join(sentences))
question_answer_pairs.append(
{
"question": dict["question"],
"answer": dict["answer"].lower(),
"type": dict["type"],
}
)
return corpus_list, question_answer_pairs