diff --git a/evals/deepeval_metrics.py b/evals/deepeval_metrics.py new file mode 100644 index 000000000..03f9f6dba --- /dev/null +++ b/evals/deepeval_metrics.py @@ -0,0 +1,14 @@ +from deepeval.metrics import GEval +from deepeval.test_case import LLMTestCaseParams + +correctness_metric = GEval( + name="Correctness", + model="gpt-4o-mini", + evaluation_params=[ + LLMTestCaseParams.ACTUAL_OUTPUT, + LLMTestCaseParams.EXPECTED_OUTPUT + ], + evaluation_steps=[ + "Determine whether the actual output is factually correct based on the expected output." + ] + )