cognee/evals/multimetric_qa_eval_run.py
alekszievr 5e076689ad
Feat: [COG-1074] fix multimetric eval bug (#463)
* feat: make tasks a configurable argument in the cognify function

* fix: add data points task

* Ugly hack for multi-metric eval bug

* some cleanup

---------

Co-authored-by: lxobr <122801072+lxobr@users.noreply.github.com>
2025-01-28 13:05:22 +01:00

75 lines
1.9 KiB
Python

import subprocess
import json
import argparse
import os
from typing import List
import sys
def run_command(command: List[str]):
try:
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1
)
while True:
stdout_line = process.stdout.readline()
stderr_line = process.stderr.readline()
if stdout_line == "" and stderr_line == "" and process.poll() is not None:
break
if stdout_line:
print(stdout_line.rstrip())
if stderr_line:
print(f"Error: {stderr_line.rstrip()}", file=sys.stderr)
if process.returncode != 0:
raise subprocess.CalledProcessError(process.returncode, command)
finally:
process.stdout.close()
process.stderr.close()
def run_evals_for_paramsfile(params_file, out_dir):
with open(params_file, "r") as file:
parameters = json.load(file)
for metric in parameters["metric_names"]:
params = parameters
params["metric_names"] = [metric]
temp_paramfile = params_file.replace(".json", f"_{metric}.json")
with open(temp_paramfile, "w") as file:
json.dump(params, file)
command = [
"python",
"evals/run_qa_eval.py",
"--params_file",
temp_paramfile,
"--out_dir",
out_dir,
]
run_command(command)
if os.path.exists(temp_paramfile):
os.remove(temp_paramfile)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--params_file", type=str, required=True, help="Which dataset to evaluate on"
)
parser.add_argument("--out_dir", type=str, help="Dir to save eval results")
args = parser.parse_args()
run_evals_for_paramsfile(args.params_file, args.out_dir)
if __name__ == "__main__":
main()