cognee/evals/promptfoo_metrics.py

from evals.promptfoo_wrapper import PromptfooWrapper
import os
import yaml
import json
import shutil


class PromptfooMetric:
    def __init__(self, judge_prompt):
        promptfoo_path = shutil.which("promptfoo")
        self.wrapper = PromptfooWrapper(promptfoo_path=promptfoo_path)
        self.judge_prompt = judge_prompt

    async def measure(self, instances, context_provider):
        with open(os.path.join(os.getcwd(), "evals/promptfoo_config_template.yaml"), "r") as file:
            config = yaml.safe_load(file)

        config["defaultTest"] = [{"assert": {"type": "llm_rubric", "value": self.judge_prompt}}]

        # Fill config file with test cases
        tests = []
        for instance in instances:
            context = await context_provider(instance)
            test = {
                "vars": {
                    "name": instance["question"][:15],
                    "question": instance["question"],
                    "context": context,
                }
            }
            tests.append(test)
        config["tests"] = tests

        # Write the updated YAML back, preserving formatting and structure
        updated_yaml_file_path = os.path.join(os.getcwd(), "config_with_context.yaml")
        with open(updated_yaml_file_path, "w") as file:
            yaml.dump(config, file)

        self.wrapper.run_eval(
            prompt_file=os.path.join(os.getcwd(), "evals/promptfooprompt.json"),
            config_file=os.path.join(os.getcwd(), "config_with_context.yaml"),
            out_format="json",
        )

        file_path = os.path.join(os.getcwd(), "benchmark_results.json")

        # Read and parse the JSON file
        with open(file_path, "r") as file:
            results = json.load(file)

        self.score = results["results"]["prompts"][0]["metrics"]["score"]

        return self.score