Feat: Adds dashboard application to parallel modal evals (#847)

## Description Adds dashboard application to parallel modal evals to enable fast retriever development/evaluation ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: lxobr <122801072+lxobr@users.noreply.github.com>
2025-05-21 09:07:02 +02:00 · 2025-05-21 09:07:02 +02:00 · 7eee769251
commit 7eee769251
parent 4c52ef62aa
1 changed files with 94 additions and 0 deletions
--- a/cognee/eval_framework/modal_eval_dashboard.py
+++ b/cognee/eval_framework/modal_eval_dashboard.py
@ -0,0 +1,94 @@
 import os
 import json
 import pandas as pd
 import subprocess
 import modal
 import streamlit as st
 # ----------------------------------------------------------------------------
 # Volume and Image Setup
 # ----------------------------------------------------------------------------
 metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_if_missing=True)
 image = (
    modal.Image.debian_slim(python_version="3.11")
    .pip_install("streamlit", "pandas", "plotly")
    .add_local_file(__file__, "/root/serve_dashboard.py")
 )
 # ----------------------------------------------------------------------------
 # Define and Deploy the App
 # ----------------------------------------------------------------------------
 app = modal.App(
    name="metrics-dashboard",
    image=image,
    volumes={"/data": metrics_volume},
 )
@app.function()
@modal.web_server(port=8000)
 def run():
    """
    Launch Streamlit server on port 8000 inside the container.
    """
    cmd = (
        "streamlit run /root/serve_dashboard.py "
        "--server.port 8000 "
        "--server.enableCORS=false "
        "--server.enableXsrfProtection=false"
    )
    subprocess.Popen(cmd, shell=True)
 # ----------------------------------------------------------------------------
 # Streamlit Dashboard Application Logic
 # ----------------------------------------------------------------------------
 def main():
    st.set_page_config(page_title="Metrics Dashboard", layout="wide")
    st.title("📊 Cognee Evaluations Dashboard")
    data_path = "/data"
    records = []
    for filename in sorted(os.listdir(data_path)):
        if not filename.endswith(".json"):
            continue
        base = filename.rsplit(".", 1)[0]
        parts = base.split("_")
        benchmark = parts[1] if len(parts) >= 3 else ""
        full_path = os.path.join(data_path, filename)
        with open(full_path, "r") as f:
            items = json.load(f)
        num_q = len(items)
        total_em = sum(q["metrics"]["EM"]["score"] for q in items)
        total_f1 = sum(q["metrics"]["f1"]["score"] for q in items)
        total_corr = sum(q["metrics"]["correctness"]["score"] for q in items)
        records.append(
            {
                "file": parts[0].upper() + "_____" + parts[2],
                "benchmark": benchmark,
                "questions": num_q,
                "avg_EM": round(total_em / num_q, 4),
                "avg_F1": round(total_f1 / num_q, 4),
                "avg_correctness": round(total_corr / num_q, 4),
            }
        )
    df = pd.DataFrame(records)
    if df.empty:
        st.warning("No JSON files found in the volume.")
        return
    st.subheader("Results by benchmark")
    for bm, group in df.groupby("benchmark"):
        st.markdown(f"### {bm}")
        st.dataframe(
            group[["file", "questions", "avg_EM", "avg_F1", "avg_correctness"]],
            use_container_width=True,
        )
 if __name__ == "__main__":
    main()