graphiti/tests/evals/data/LongMemEval_Snippetization.ipynb
prestonrasmussen b35729643d add e2e eval
2025-04-08 12:24:27 -04:00

510 lines
20 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i9i9uUZ3pWQE",
"outputId": "84404bb8-5841-4f2f-dd87-7f909c6e95aa"
},
"outputs": [],
"source": [
"######## Installations - BE SURE TO MAKE YOUR OWN LOCAL VENV FIRST\n",
"\n",
"%pip install gdown pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KgzveXyAp35v"
},
"outputs": [],
"source": [
"######## Imports\n",
"\n",
"import csv\n",
"import json\n",
"import os\n",
"import tarfile\n",
"from datetime import datetime\n",
"\n",
"import gdown\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iwcBxgxgqES8",
"outputId": "4af8de17-57bd-4857-f49c-2ee5d39ed248"
},
"outputs": [],
"source": [
"######## Download the eval dataset from the official Google Drive source\n",
"\n",
"file_id = '1zJgtYRFhOh5zDQzzatiddfjYhFSnyQ80'\n",
"url = f'https://drive.google.com/uc?id={file_id}'\n",
"folder_path = './longmemeval_data'\n",
"file_path = os.path.join(folder_path, 'longmemeval_data.tar.gz')\n",
"\n",
"# If it doesn't exist, create a \"/.longmemeval_data/\" directory\n",
"if not os.path.exists(folder_path):\n",
" os.makedirs(folder_path)\n",
"\n",
"# Download the compressed dataset\n",
"if not os.path.exists(file_path):\n",
" gdown.download(url, file_path, quiet=False)\n",
"else:\n",
" print(f\"'{file_path}' already exists, skipping download.\")\n",
"\n",
"# Extract the tar.gz\n",
"if not os.path.exists(os.path.join(folder_path, 'longmemeval_oracle.json')):\n",
" with tarfile.open(file_path, 'r:gz') as tar:\n",
" tar.extractall(path=folder_path)\n",
"else:\n",
" print(\"'longmemeval_oracle.json' already exists, so skipping extraction.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_RjEZnk5v530"
},
"outputs": [],
"source": [
"######## Load the eval dataset\n",
"\n",
"lme_dataset_option = os.path.join(\n",
" folder_path, 'longmemeval_oracle.json'\n",
") # Can be _oracle, _s, or _m\n",
"lme_dataset_df = pd.read_json(lme_dataset_option)\n",
"lme_dataset_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"######## Method to save all of the snippets (or only firsts/lasts) of the specified multi-sessions to a CSV file\n",
"\n",
"\n",
"def snippetize_lme_dataset(lme_filename, max_num_previous_messages=5):\n",
" \"\"\"\n",
" Creates a csv where each row is a \"snippet\" from longmemeval. A snippet is a message and set of previous messages.\n",
" \"\"\"\n",
"\n",
" lme_dataset_option = os.path.join(folder_path, lme_filename)\n",
" lme_dataset_df = pd.read_json(lme_dataset_option)\n",
"\n",
" all_snippets = []\n",
" for index, row in lme_dataset_df.iterrows():\n",
" question_id = row['question_id']\n",
"\n",
" # Extract the haystack_sessions and dates\n",
" sessions = row['haystack_sessions']\n",
" session_dates = row['haystack_dates']\n",
"\n",
" # Combine into list of dictionaries\n",
" sessions_data = [\n",
" {'session': session, 'date': datetime.strptime(date, '%Y/%m/%d (%a) %H:%M')}\n",
" for session, date in zip(sessions, session_dates)\n",
" ]\n",
"\n",
" # Sort by date from earliest to latest\n",
" sessions_data.sort(key=lambda x: x['date'])\n",
"\n",
" all_snippets_this_session = []\n",
"\n",
" message_index_across_sessions = 0\n",
" for session_index, session_and_date in enumerate(sessions_data):\n",
" for message_index_within_session, message in enumerate(session_and_date['session']):\n",
" num_previous_messages = min(\n",
" max_num_previous_messages, message_index_across_sessions\n",
" )\n",
" previous_snippets = all_snippets_this_session[\n",
" message_index_across_sessions - num_previous_messages :\n",
" ]\n",
" previous_messages_only = [\n",
" {\n",
" 'role': previous_snippet['message']['role'],\n",
" 'content': previous_snippet['message']['content'],\n",
" }\n",
" for previous_snippet in previous_snippets\n",
" ]\n",
"\n",
" snippet = {\n",
" 'question_id': question_id,\n",
" 'question_type': row['question_type'],\n",
" 'multisession_index': index,\n",
" 'session_index': session_index,\n",
" 'message_index_within_session': message_index_within_session,\n",
" 'message_index_across_sessions': message_index_across_sessions,\n",
" 'session_date': session_and_date['date'],\n",
" 'message': message,\n",
" 'previous_messages': previous_messages_only,\n",
" 'num_previous_messages': num_previous_messages,\n",
" }\n",
"\n",
" if lme_filename == 'longmemeval_oracle.json':\n",
" snippet['message_has_answer'] = message['has_answer']\n",
"\n",
" all_snippets_this_session.append(snippet)\n",
" message_index_across_sessions += 1\n",
"\n",
" all_snippets.extend(all_snippets_this_session)\n",
"\n",
" snippetized_folder = os.path.join(folder_path, 'snippetized_data')\n",
" if not os.path.exists(snippetized_folder):\n",
" os.makedirs(snippetized_folder)\n",
"\n",
" filename = lme_filename.replace('.json', '_snippetized.csv')\n",
" filepath = os.path.join(snippetized_folder, filename)\n",
"\n",
" with open(filepath, 'w', newline='') as csvfile:\n",
" writer = csv.DictWriter(csvfile, fieldnames=all_snippets[0].keys())\n",
" writer.writeheader()\n",
" for snippet in all_snippets:\n",
" snippet['message'] = json.dumps(snippet['message'])\n",
" snippet['previous_messages'] = json.dumps(snippet['previous_messages'])\n",
" writer.writerow(snippet)\n",
"\n",
"\n",
"def snippetize_and_check(lme_filename):\n",
" folder_path = './longmemeval_data/snippetized_data'\n",
" file_path = os.path.join(folder_path, lme_filename.replace('.json', '_snippetized.csv'))\n",
" if not os.path.exists(file_path):\n",
" print(f'Snippetizing {lme_filename}...')\n",
" snippetize_lme_dataset(lme_filename)\n",
" else:\n",
" print(f'Skipping snippetization for {lme_filename} because it already exists.')\n",
"\n",
" # Check first few rows of the csv\n",
" df = pd.read_csv(file_path)\n",
" display(df.head(10))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-20T17:17:33.985547Z",
"start_time": "2025-03-20T17:17:33.569496Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Skipping snippetization for longmemeval_oracle.json because it already exists.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>question_id</th>\n",
" <th>question_type</th>\n",
" <th>multisession_index</th>\n",
" <th>session_index</th>\n",
" <th>message_index_within_session</th>\n",
" <th>message_index_across_sessions</th>\n",
" <th>session_date</th>\n",
" <th>message</th>\n",
" <th>previous_messages</th>\n",
" <th>num_previous_messages</th>\n",
" <th>message_has_answer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"user\", \"content\": \"I'm thinking of g...</td>\n",
" <td>[]</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"assistant\", \"content\": \"Choosing the...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"user\", \"content\": \"I've been doing s...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"assistant\", \"content\": \"That's great...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"user\", \"content\": \"I'll definitely a...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"assistant\", \"content\": \"Advanced pai...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"user\", \"content\": \"I'll definitely a...</td>\n",
" <td>[{\"role\": \"assistant\", \"content\": \"Choosing th...</td>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"assistant\", \"content\": \"Congratulati...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I've been doing ...</td>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"user\", \"content\": \"That's really hel...</td>\n",
" <td>[{\"role\": \"assistant\", \"content\": \"That's grea...</td>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>gpt4_2655b836</td>\n",
" <td>temporal-reasoning</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>2023-04-10 14:47:00</td>\n",
" <td>{\"role\": \"assistant\", \"content\": \"Congratulati...</td>\n",
" <td>[{\"role\": \"user\", \"content\": \"I'll definitely ...</td>\n",
" <td>5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" question_id question_type multisession_index session_index \\\n",
"0 gpt4_2655b836 temporal-reasoning 0 0 \n",
"1 gpt4_2655b836 temporal-reasoning 0 0 \n",
"2 gpt4_2655b836 temporal-reasoning 0 0 \n",
"3 gpt4_2655b836 temporal-reasoning 0 0 \n",
"4 gpt4_2655b836 temporal-reasoning 0 0 \n",
"5 gpt4_2655b836 temporal-reasoning 0 0 \n",
"6 gpt4_2655b836 temporal-reasoning 0 0 \n",
"7 gpt4_2655b836 temporal-reasoning 0 0 \n",
"8 gpt4_2655b836 temporal-reasoning 0 0 \n",
"9 gpt4_2655b836 temporal-reasoning 0 0 \n",
"\n",
" message_index_within_session message_index_across_sessions \\\n",
"0 0 0 \n",
"1 1 1 \n",
"2 2 2 \n",
"3 3 3 \n",
"4 4 4 \n",
"5 5 5 \n",
"6 6 6 \n",
"7 7 7 \n",
"8 8 8 \n",
"9 9 9 \n",
"\n",
" session_date message \\\n",
"0 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'm thinking of g... \n",
"1 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Choosing the... \n",
"2 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I've been doing s... \n",
"3 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"That's great... \n",
"4 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'll definitely a... \n",
"5 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Advanced pai... \n",
"6 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'll definitely a... \n",
"7 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Congratulati... \n",
"8 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"That's really hel... \n",
"9 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Congratulati... \n",
"\n",
" previous_messages num_previous_messages \\\n",
"0 [] 0 \n",
"1 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 1 \n",
"2 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 2 \n",
"3 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 3 \n",
"4 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 4 \n",
"5 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 5 \n",
"6 [{\"role\": \"assistant\", \"content\": \"Choosing th... 5 \n",
"7 [{\"role\": \"user\", \"content\": \"I've been doing ... 5 \n",
"8 [{\"role\": \"assistant\", \"content\": \"That's grea... 5 \n",
"9 [{\"role\": \"user\", \"content\": \"I'll definitely ... 5 \n",
"\n",
" message_has_answer \n",
"0 False \n",
"1 False \n",
"2 True \n",
"3 False \n",
"4 False \n",
"5 False \n",
"6 False \n",
"7 False \n",
"8 False \n",
"9 False "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"lme_filename = 'longmemeval_oracle.json'\n",
"snippetize_and_check(lme_filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lme_filename = 'longmemeval_s.json'\n",
"snippetize_and_check(lme_filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lme_filename = 'longmemeval_m.json'\n",
"snippetize_and_check(lme_filename)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}