cognee/evals/multimetric_qa_eval_run.py

import subprocess
import json
import argparse
import os
from typing import List
import sys


def run_command(command: List[str]):
    try:
        process = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1
        )

        while True:
            stdout_line = process.stdout.readline()
            stderr_line = process.stderr.readline()

            if stdout_line == "" and stderr_line == "" and process.poll() is not None:
                break

            if stdout_line:
                print(stdout_line.rstrip())
            if stderr_line:
                print(f"Error: {stderr_line.rstrip()}", file=sys.stderr)

        if process.returncode != 0:
            raise subprocess.CalledProcessError(process.returncode, command)
    finally:
        process.stdout.close()
        process.stderr.close()


def run_evals_for_paramsfile(params_file, out_dir):
    with open(params_file, "r") as file:
        parameters = json.load(file)

    for metric in parameters["metric_names"]:
        params = parameters
        params["metric_names"] = [metric]

        temp_paramfile = params_file.replace(".json", f"_{metric}.json")
        with open(temp_paramfile, "w") as file:
            json.dump(params, file)

        command = [
            "python",
            "evals/run_qa_eval.py",
            "--params_file",
            temp_paramfile,
            "--out_dir",
            out_dir,
        ]

        run_command(command)

        if os.path.exists(temp_paramfile):
            os.remove(temp_paramfile)


def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--params_file", type=str, required=True, help="Which dataset to evaluate on"
    )
    parser.add_argument("--out_dir", type=str, help="Dir to save eval results")

    args = parser.parse_args()

    run_evals_for_paramsfile(args.params_file, args.out_dir)


if __name__ == "__main__":
    main()