Merge branch 'dev' into dataset-permissions
This commit is contained in:
commit
c68175d3f5
5 changed files with 100 additions and 9 deletions
|
|
@ -58,8 +58,8 @@ def get_delete_router() -> APIRouter:
|
||||||
)
|
)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
else:
|
else:
|
||||||
# Handle uploaded file
|
# Handle uploaded file by accessing its file attribute
|
||||||
result = await cognee_delete(file, dataset_name=dataset_name, mode=mode)
|
result = await cognee_delete(file.file, dataset_name=dataset_name, mode=mode)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
if len(results) == 1:
|
if len(results) == 1:
|
||||||
|
|
|
||||||
94
cognee/eval_framework/modal_eval_dashboard.py
Normal file
94
cognee/eval_framework/modal_eval_dashboard.py
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
import subprocess
|
||||||
|
import modal
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Volume and Image Setup
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
metrics_volume = modal.Volume.from_name("evaluation_dashboard_results", create_if_missing=True)
|
||||||
|
|
||||||
|
image = (
|
||||||
|
modal.Image.debian_slim(python_version="3.11")
|
||||||
|
.pip_install("streamlit", "pandas", "plotly")
|
||||||
|
.add_local_file(__file__, "/root/serve_dashboard.py")
|
||||||
|
)
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Define and Deploy the App
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
app = modal.App(
|
||||||
|
name="metrics-dashboard",
|
||||||
|
image=image,
|
||||||
|
volumes={"/data": metrics_volume},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.function()
|
||||||
|
@modal.web_server(port=8000)
|
||||||
|
def run():
|
||||||
|
"""
|
||||||
|
Launch Streamlit server on port 8000 inside the container.
|
||||||
|
"""
|
||||||
|
cmd = (
|
||||||
|
"streamlit run /root/serve_dashboard.py "
|
||||||
|
"--server.port 8000 "
|
||||||
|
"--server.enableCORS=false "
|
||||||
|
"--server.enableXsrfProtection=false"
|
||||||
|
)
|
||||||
|
subprocess.Popen(cmd, shell=True)
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Streamlit Dashboard Application Logic
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
def main():
|
||||||
|
st.set_page_config(page_title="Metrics Dashboard", layout="wide")
|
||||||
|
st.title("📊 Cognee Evaluations Dashboard")
|
||||||
|
|
||||||
|
data_path = "/data"
|
||||||
|
records = []
|
||||||
|
|
||||||
|
for filename in sorted(os.listdir(data_path)):
|
||||||
|
if not filename.endswith(".json"):
|
||||||
|
continue
|
||||||
|
base = filename.rsplit(".", 1)[0]
|
||||||
|
parts = base.split("_")
|
||||||
|
benchmark = parts[1] if len(parts) >= 3 else ""
|
||||||
|
|
||||||
|
full_path = os.path.join(data_path, filename)
|
||||||
|
with open(full_path, "r") as f:
|
||||||
|
items = json.load(f)
|
||||||
|
num_q = len(items)
|
||||||
|
total_em = sum(q["metrics"]["EM"]["score"] for q in items)
|
||||||
|
total_f1 = sum(q["metrics"]["f1"]["score"] for q in items)
|
||||||
|
total_corr = sum(q["metrics"]["correctness"]["score"] for q in items)
|
||||||
|
records.append(
|
||||||
|
{
|
||||||
|
"file": parts[0].upper() + "_____" + parts[2],
|
||||||
|
"benchmark": benchmark,
|
||||||
|
"questions": num_q,
|
||||||
|
"avg_EM": round(total_em / num_q, 4),
|
||||||
|
"avg_F1": round(total_f1 / num_q, 4),
|
||||||
|
"avg_correctness": round(total_corr / num_q, 4),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
df = pd.DataFrame(records)
|
||||||
|
if df.empty:
|
||||||
|
st.warning("No JSON files found in the volume.")
|
||||||
|
return
|
||||||
|
|
||||||
|
st.subheader("Results by benchmark")
|
||||||
|
for bm, group in df.groupby("benchmark"):
|
||||||
|
st.markdown(f"### {bm}")
|
||||||
|
st.dataframe(
|
||||||
|
group[["file", "questions", "avg_EM", "avg_F1", "avg_correctness"]],
|
||||||
|
use_container_width=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -40,8 +40,7 @@ class GenericAPIAdapter(LLMInterface):
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"""Use the given format to
|
"content": f"""{text_input}""",
|
||||||
extract information from the following input: {text_input}. """,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ class OllamaAPIAdapter(LLMInterface):
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"Use the given format to extract information from the following input: {text_input}",
|
"content": f"{text_input}",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
|
||||||
|
|
@ -63,8 +63,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"""Use the given format to
|
"content": f"""{text_input}""",
|
||||||
extract information from the following input: {text_input}. """,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
@ -91,8 +90,7 @@ class OpenAIAdapter(LLMInterface):
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"""Use the given format to
|
"content": f"""{text_input}""",
|
||||||
extract information from the following input: {text_input}. """,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue