cognee/evals/promptfoo_metrics.py
alekszievr 6653d73556
Feat/cog 950 improve metric selection (#435)
* QA eval dataset as argument, with hotpot and 2wikimultihop as options. Json schema validation for datasets.

* Load dataset file by filename, outsource utilities

* restructure metric selection

* Add comprehensiveness, diversity and empowerment metrics

* add promptfoo as an option

* refactor RAG solution in eval;2C

* LLM as a judge metrics implemented in a uniform way

* Use requests.get instead of wget

* clean up promptfoo config template

* minor fixes

* get promptfoo path instead of hardcoding

* minor fixes

* Add LLM as a judge prompts

* Minor refactor and logger usage
2025-01-15 10:45:55 +01:00

53 lines
1.8 KiB
Python

from evals.promptfoo_wrapper import PromptfooWrapper
import os
import yaml
import json
import shutil
class PromptfooMetric:
def __init__(self, judge_prompt):
promptfoo_path = shutil.which("promptfoo")
self.wrapper = PromptfooWrapper(promptfoo_path=promptfoo_path)
self.judge_prompt = judge_prompt
async def measure(self, instances, context_provider):
with open(os.path.join(os.getcwd(), "evals/promptfoo_config_template.yaml"), "r") as file:
config = yaml.safe_load(file)
config["defaultTest"] = [{"assert": {"type": "llm_rubric", "value": self.judge_prompt}}]
# Fill config file with test cases
tests = []
for instance in instances:
context = await context_provider(instance)
test = {
"vars": {
"name": instance["question"][:15],
"question": instance["question"],
"context": context,
}
}
tests.append(test)
config["tests"] = tests
# Write the updated YAML back, preserving formatting and structure
updated_yaml_file_path = os.path.join(os.getcwd(), "config_with_context.yaml")
with open(updated_yaml_file_path, "w") as file:
yaml.dump(config, file)
self.wrapper.run_eval(
prompt_file=os.path.join(os.getcwd(), "evals/promptfooprompt.json"),
config_file=os.path.join(os.getcwd(), "config_with_context.yaml"),
out_format="json",
)
file_path = os.path.join(os.getcwd(), "benchmark_results.json")
# Read and parse the JSON file
with open(file_path, "r") as file:
results = json.load(file)
self.score = results["results"]["prompts"][0]["metrics"]["score"]
return self.score