cognee/evals/benchmark_summary_competition.json
Vasilije f65605b575
fix: Feature/cog 2648 evals update (#1221)
<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: lxobr <122801072+lxobr@users.noreply.github.com>
Co-authored-by: Hande <159312713+hande-k@users.noreply.github.com>
2025-08-08 20:23:09 +02:00

94 lines
1.6 KiB
JSON

[
{
"system": "Cognee",
"Human-like Correctness": 0.925,
"Human-like Correctness Error": [
0.911,
0.94
],
"DeepEval Correctness": 0.846,
"DeepEval Correctness Error": [
0.83,
0.863
],
"DeepEval EM": 0.687,
"DeepEval EM Error": [
0.661,
0.717
],
"DeepEval F1": 0.841,
"DeepEval F1 Error": [
0.821,
0.861
]
},
{
"system": "LightRAG",
"Human-like Correctness": 0.955,
"Human-like Correctness Error": [
0.944,
0.965
],
"DeepEval Correctness": 0.673,
"DeepEval Correctness Error": [
0.661,
0.684
],
"DeepEval EM": 0.0,
"DeepEval EM Error": [
0.0,
0.0
],
"DeepEval F1": 0.09,
"DeepEval F1 Error": [
0.087,
0.094
]
},
{
"system": "Mem0",
"Human-like Correctness": 0.722,
"Human-like Correctness Error": [
0.695,
0.747
],
"DeepEval Correctness": 0.541,
"DeepEval Correctness Error": [
0.524,
0.559
],
"DeepEval EM": 0.0,
"DeepEval EM Error": [
0.0,
0.0
],
"DeepEval F1": 0.12,
"DeepEval F1 Error": [
0.114,
0.127
]
},
{
"system": "Graphiti",
"Human-like Correctness": 0.884,
"Human-like Correctness Error": [
0.802,
0.954
],
"DeepEval Correctness": 0.74,
"DeepEval Correctness Error": [
0.659,
0.816
],
"DeepEval EM": 0.46,
"DeepEval EM Error": [
0.32,
0.6
],
"DeepEval F1": 0.695,
"DeepEval F1 Error": [
0.589,
0.797
]
}
]