* QA eval dataset as argument, with hotpot and 2wikimultihop as options. Json schema validation for datasets. * Load dataset file by filename, outsource utilities * restructure metric selection * Add comprehensiveness, diversity and empowerment metrics * add promptfoo as an option * refactor RAG solution in eval;2C * LLM as a judge metrics implemented in a uniform way * Use requests.get instead of wget * clean up promptfoo config template * minor fixes * get promptfoo path instead of hardcoding * minor fixes * Add LLM as a judge prompts * Minor refactor and logger usage
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
from cognee.root_dir import get_absolute_path
|
|
import json
|
|
import requests
|
|
from jsonschema import ValidationError, validate
|
|
from pathlib import Path
|
|
|
|
|
|
qa_datasets = {
|
|
"hotpotqa": {
|
|
"filename": "hotpot_dev_fullwiki_v1.json",
|
|
"URL": "http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json",
|
|
},
|
|
"2wikimultihop": {
|
|
"filename": "data/dev.json",
|
|
"URL": "https://www.dropbox.com/scl/fi/heid2pkiswhfaqr5g0piw/data.zip?rlkey=ira57daau8lxfj022xvk1irju&e=1",
|
|
},
|
|
}
|
|
|
|
qa_json_schema = {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"answer": {"type": "string"},
|
|
"question": {"type": "string"},
|
|
"context": {"type": "array"},
|
|
},
|
|
"required": ["answer", "question", "context"],
|
|
"additionalProperties": True,
|
|
},
|
|
}
|
|
|
|
|
|
def download_qa_dataset(dataset_name: str, filepath: Path):
|
|
if dataset_name not in qa_datasets:
|
|
raise ValueError(f"{dataset_name} is not a supported dataset.")
|
|
|
|
url = qa_datasets[dataset_name]["URL"]
|
|
|
|
if dataset_name == "2wikimultihop":
|
|
raise Exception(
|
|
"Please download 2wikimultihop dataset (data.zip) manually from \
|
|
https://www.dropbox.com/scl/fi/heid2pkiswhfaqr5g0piw/data.zip?rlkey=ira57daau8lxfj022xvk1irju&e=1 \
|
|
and unzip it."
|
|
)
|
|
|
|
response = requests.get(url, stream=True)
|
|
|
|
if response.status_code == 200:
|
|
with open(filepath, "wb") as file:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
file.write(chunk)
|
|
print(f"Dataset {dataset_name} downloaded and saved to {filepath}")
|
|
else:
|
|
print(f"Failed to download {dataset_name}. Status code: {response.status_code}")
|
|
|
|
|
|
def load_qa_dataset(dataset_name_or_filename: str) -> list[dict]:
|
|
if dataset_name_or_filename in qa_datasets:
|
|
dataset_name = dataset_name_or_filename
|
|
filename = qa_datasets[dataset_name]["filename"]
|
|
|
|
data_root_dir = get_absolute_path("../.data")
|
|
if not Path(data_root_dir).exists():
|
|
Path(data_root_dir).mkdir()
|
|
|
|
filepath = data_root_dir / Path(filename)
|
|
if not filepath.exists():
|
|
download_qa_dataset(dataset_name, filepath)
|
|
else:
|
|
filename = dataset_name_or_filename
|
|
filepath = Path(filename)
|
|
|
|
with open(filepath, "r") as file:
|
|
dataset = json.load(file)
|
|
|
|
try:
|
|
validate(instance=dataset, schema=qa_json_schema)
|
|
except ValidationError as e:
|
|
raise ValidationError(f"Invalid QA dataset: {e.message}")
|
|
|
|
return dataset
|