From f6ced4122a2d825a3ca5a222617b10f355727daf Mon Sep 17 00:00:00 2001 From: alekszievr <44192193+alekszievr@users.noreply.github.com> Date: Wed, 26 Feb 2025 12:45:34 +0100 Subject: [PATCH] Test: test eval dashboard generation [COG-1234] (#570) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin ## Summary by CodeRabbit - **Tests** - Introduced a new test suite for validating the metrics dashboard generation. - Added tests for the `bootstrap_ci` function to ensure accurate calculations and handling of various input scenarios. --- .../unit/eval_framework/dashboard_test.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 cognee/tests/unit/eval_framework/dashboard_test.py diff --git a/cognee/tests/unit/eval_framework/dashboard_test.py b/cognee/tests/unit/eval_framework/dashboard_test.py new file mode 100644 index 000000000..3fda78189 --- /dev/null +++ b/cognee/tests/unit/eval_framework/dashboard_test.py @@ -0,0 +1,104 @@ +import unittest +from unittest.mock import patch +import json +import os +import tempfile +from evals.eval_framework.metrics_dashboard import generate_metrics_dashboard, bootstrap_ci +import numpy as np + + +class TestGenerateMetricsDashboard(unittest.TestCase): + def setUp(self): + self.test_data = [ + { + "question": "What is AI?", + "answer": "Artificial Intelligence", + "golden_answer": "Artificial Intelligence", + "metrics": { + "accuracy": {"score": 0.9, "reason": "Close enough"}, + "relevance": {"score": 0.8}, + }, + }, + { + "question": "What is ML?", + "answer": "Machine Learning", + "golden_answer": "Machine Learning", + "metrics": { + "accuracy": {"score": 0.95, "reason": "Exact match"}, + "relevance": {"score": 0.85}, + }, + }, + ] + + self.temp_json = tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-8") + json.dump(self.test_data, self.temp_json) + self.temp_json.close() + self.output_file = "test_dashboard.html" + + def tearDown(self): + os.remove(self.temp_json.name) + if os.path.exists(self.output_file): + os.remove(self.output_file) + + def test_generate_metrics_dashboard_valid_json(self): + """Test if the function processes valid JSON correctly and creates an output file.""" + result = generate_metrics_dashboard( + self.temp_json.name, self.output_file, benchmark="Test Benchmark" + ) + + self.assertTrue(os.path.exists(self.output_file)) + self.assertEqual(result, self.output_file) + + with open(self.output_file, "r", encoding="utf-8") as f: + html_content = f.read() + self.assertIn("