{ "correctness": { "scores": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 0.8, 0.5, 1.0, 0.2, 1.0, 1.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.8, 0.0, 1.0, 1.0, 1.0, 1.0, 0.5, 1.0, 0.5, 0.2, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0 ], "mean": 0.7380000000000001, "ci_lower": 0.626, "ci_upper": 0.8419999999999999 } }