Feat: Adds dashboard application to parallel modal evals (#847)

## Description Adds dashboard application to parallel modal evals to enable fast retriever development/evaluation ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: lxobr <122801072+lxobr@users.noreply.github.com>
2025-05-21 09:07:02 +02:00 · 2025-05-21 09:07:02 +02:00 · 7eee769251
commit 7eee769251
parent 4c52ef62aa
1 changed files with 94 additions and 0 deletions
--- a/cognee/eval_framework/modal_eval_dashboard.py
+++ b/cognee/eval_framework/modal_eval_dashboard.py
@ -0,0 +1,94 @@
+import os
+import json
+import pandas as pd
+import subprocess
+import modal
+import streamlit as st
+
+# ----------------------------------------------------------------------------
+# Volume and Image Setup
+# ----------------------------------------------------------------------------
+metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_if_missing=True)
+
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install("streamlit", "pandas", "plotly")
+    .add_local_file(__file__, "/root/serve_dashboard.py")
+)
+
+# ----------------------------------------------------------------------------
+# Define and Deploy the App
+# ----------------------------------------------------------------------------
+app = modal.App(
+    name="metrics-dashboard",
+    image=image,
+    volumes={"/data": metrics_volume},
+)
+
+
+@app.function()
+@modal.web_server(port=8000)
+def run():
+    """
+    Launch Streamlit server on port 8000 inside the container.
+    """
+    cmd = (
+        "streamlit run /root/serve_dashboard.py "
+        "--server.port 8000 "
+        "--server.enableCORS=false "
+        "--server.enableXsrfProtection=false"
+    )
+    subprocess.Popen(cmd, shell=True)
+
+
+# ----------------------------------------------------------------------------
+# Streamlit Dashboard Application Logic
+# ----------------------------------------------------------------------------
+def main():
+    st.set_page_config(page_title="Metrics Dashboard", layout="wide")
+    st.title("📊 Cognee Evaluations Dashboard")
+
+    data_path = "/data"
+    records = []
+
+    for filename in sorted(os.listdir(data_path)):
+        if not filename.endswith(".json"):
+            continue
+        base = filename.rsplit(".", 1)[0]
+        parts = base.split("_")
+        benchmark = parts[1] if len(parts) >= 3 else ""
+
+        full_path = os.path.join(data_path, filename)
+        with open(full_path, "r") as f:
+            items = json.load(f)
+        num_q = len(items)
+        total_em = sum(q["metrics"]["EM"]["score"] for q in items)
+        total_f1 = sum(q["metrics"]["f1"]["score"] for q in items)
+        total_corr = sum(q["metrics"]["correctness"]["score"] for q in items)
+        records.append(
+            {
+                "file": parts[0].upper() + "_____" + parts[2],
+                "benchmark": benchmark,
+                "questions": num_q,
+                "avg_EM": round(total_em / num_q, 4),
+                "avg_F1": round(total_f1 / num_q, 4),
+                "avg_correctness": round(total_corr / num_q, 4),
+            }
+        )
+
+    df = pd.DataFrame(records)
+    if df.empty:
+        st.warning("No JSON files found in the volume.")
+        return
+
+    st.subheader("Results by benchmark")
+    for bm, group in df.groupby("benchmark"):
+        st.markdown(f"### {bm}")
+        st.dataframe(
+            group[["file", "questions", "avg_EM", "avg_F1", "avg_correctness"]],
+            use_container_width=True,
+        )
+
+
+if __name__ == "__main__":
+    main()