510 lines
20 KiB
Text
510 lines
20 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "i9i9uUZ3pWQE",
|
|
"outputId": "84404bb8-5841-4f2f-dd87-7f909c6e95aa"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"######## Installations - BE SURE TO MAKE YOUR OWN LOCAL VENV FIRST\n",
|
|
"\n",
|
|
"%pip install gdown pandas"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "KgzveXyAp35v"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"######## Imports\n",
|
|
"\n",
|
|
"import csv\n",
|
|
"import json\n",
|
|
"import os\n",
|
|
"import tarfile\n",
|
|
"from datetime import datetime\n",
|
|
"\n",
|
|
"import gdown\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "iwcBxgxgqES8",
|
|
"outputId": "4af8de17-57bd-4857-f49c-2ee5d39ed248"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"######## Download the eval dataset from the official Google Drive source\n",
|
|
"\n",
|
|
"file_id = '1zJgtYRFhOh5zDQzzatiddfjYhFSnyQ80'\n",
|
|
"url = f'https://drive.google.com/uc?id={file_id}'\n",
|
|
"folder_path = './longmemeval_data'\n",
|
|
"file_path = os.path.join(folder_path, 'longmemeval_data.tar.gz')\n",
|
|
"\n",
|
|
"# If it doesn't exist, create a \"/.longmemeval_data/\" directory\n",
|
|
"if not os.path.exists(folder_path):\n",
|
|
" os.makedirs(folder_path)\n",
|
|
"\n",
|
|
"# Download the compressed dataset\n",
|
|
"if not os.path.exists(file_path):\n",
|
|
" gdown.download(url, file_path, quiet=False)\n",
|
|
"else:\n",
|
|
" print(f\"'{file_path}' already exists, skipping download.\")\n",
|
|
"\n",
|
|
"# Extract the tar.gz\n",
|
|
"if not os.path.exists(os.path.join(folder_path, 'longmemeval_oracle.json')):\n",
|
|
" with tarfile.open(file_path, 'r:gz') as tar:\n",
|
|
" tar.extractall(path=folder_path)\n",
|
|
"else:\n",
|
|
" print(\"'longmemeval_oracle.json' already exists, so skipping extraction.\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "_RjEZnk5v530"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"######## Load the eval dataset\n",
|
|
"\n",
|
|
"lme_dataset_option = os.path.join(\n",
|
|
" folder_path, 'longmemeval_oracle.json'\n",
|
|
") # Can be _oracle, _s, or _m\n",
|
|
"lme_dataset_df = pd.read_json(lme_dataset_option)\n",
|
|
"lme_dataset_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"######## Method to save all of the snippets (or only firsts/lasts) of the specified multi-sessions to a CSV file\n",
|
|
"\n",
|
|
"\n",
|
|
"def snippetize_lme_dataset(lme_filename, max_num_previous_messages=5):\n",
|
|
" \"\"\"\n",
|
|
" Creates a csv where each row is a \"snippet\" from longmemeval. A snippet is a message and set of previous messages.\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" lme_dataset_option = os.path.join(folder_path, lme_filename)\n",
|
|
" lme_dataset_df = pd.read_json(lme_dataset_option)\n",
|
|
"\n",
|
|
" all_snippets = []\n",
|
|
" for index, row in lme_dataset_df.iterrows():\n",
|
|
" question_id = row['question_id']\n",
|
|
"\n",
|
|
" # Extract the haystack_sessions and dates\n",
|
|
" sessions = row['haystack_sessions']\n",
|
|
" session_dates = row['haystack_dates']\n",
|
|
"\n",
|
|
" # Combine into list of dictionaries\n",
|
|
" sessions_data = [\n",
|
|
" {'session': session, 'date': datetime.strptime(date, '%Y/%m/%d (%a) %H:%M')}\n",
|
|
" for session, date in zip(sessions, session_dates)\n",
|
|
" ]\n",
|
|
"\n",
|
|
" # Sort by date from earliest to latest\n",
|
|
" sessions_data.sort(key=lambda x: x['date'])\n",
|
|
"\n",
|
|
" all_snippets_this_session = []\n",
|
|
"\n",
|
|
" message_index_across_sessions = 0\n",
|
|
" for session_index, session_and_date in enumerate(sessions_data):\n",
|
|
" for message_index_within_session, message in enumerate(session_and_date['session']):\n",
|
|
" num_previous_messages = min(\n",
|
|
" max_num_previous_messages, message_index_across_sessions\n",
|
|
" )\n",
|
|
" previous_snippets = all_snippets_this_session[\n",
|
|
" message_index_across_sessions - num_previous_messages :\n",
|
|
" ]\n",
|
|
" previous_messages_only = [\n",
|
|
" {\n",
|
|
" 'role': previous_snippet['message']['role'],\n",
|
|
" 'content': previous_snippet['message']['content'],\n",
|
|
" }\n",
|
|
" for previous_snippet in previous_snippets\n",
|
|
" ]\n",
|
|
"\n",
|
|
" snippet = {\n",
|
|
" 'question_id': question_id,\n",
|
|
" 'question_type': row['question_type'],\n",
|
|
" 'multisession_index': index,\n",
|
|
" 'session_index': session_index,\n",
|
|
" 'message_index_within_session': message_index_within_session,\n",
|
|
" 'message_index_across_sessions': message_index_across_sessions,\n",
|
|
" 'session_date': session_and_date['date'],\n",
|
|
" 'message': message,\n",
|
|
" 'previous_messages': previous_messages_only,\n",
|
|
" 'num_previous_messages': num_previous_messages,\n",
|
|
" }\n",
|
|
"\n",
|
|
" if lme_filename == 'longmemeval_oracle.json':\n",
|
|
" snippet['message_has_answer'] = message['has_answer']\n",
|
|
"\n",
|
|
" all_snippets_this_session.append(snippet)\n",
|
|
" message_index_across_sessions += 1\n",
|
|
"\n",
|
|
" all_snippets.extend(all_snippets_this_session)\n",
|
|
"\n",
|
|
" snippetized_folder = os.path.join(folder_path, 'snippetized_data')\n",
|
|
" if not os.path.exists(snippetized_folder):\n",
|
|
" os.makedirs(snippetized_folder)\n",
|
|
"\n",
|
|
" filename = lme_filename.replace('.json', '_snippetized.csv')\n",
|
|
" filepath = os.path.join(snippetized_folder, filename)\n",
|
|
"\n",
|
|
" with open(filepath, 'w', newline='') as csvfile:\n",
|
|
" writer = csv.DictWriter(csvfile, fieldnames=all_snippets[0].keys())\n",
|
|
" writer.writeheader()\n",
|
|
" for snippet in all_snippets:\n",
|
|
" snippet['message'] = json.dumps(snippet['message'])\n",
|
|
" snippet['previous_messages'] = json.dumps(snippet['previous_messages'])\n",
|
|
" writer.writerow(snippet)\n",
|
|
"\n",
|
|
"\n",
|
|
"def snippetize_and_check(lme_filename):\n",
|
|
" folder_path = './longmemeval_data/snippetized_data'\n",
|
|
" file_path = os.path.join(folder_path, lme_filename.replace('.json', '_snippetized.csv'))\n",
|
|
" if not os.path.exists(file_path):\n",
|
|
" print(f'Snippetizing {lme_filename}...')\n",
|
|
" snippetize_lme_dataset(lme_filename)\n",
|
|
" else:\n",
|
|
" print(f'Skipping snippetization for {lme_filename} because it already exists.')\n",
|
|
"\n",
|
|
" # Check first few rows of the csv\n",
|
|
" df = pd.read_csv(file_path)\n",
|
|
" display(df.head(10))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-03-20T17:17:33.985547Z",
|
|
"start_time": "2025-03-20T17:17:33.569496Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Skipping snippetization for longmemeval_oracle.json because it already exists.\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>question_id</th>\n",
|
|
" <th>question_type</th>\n",
|
|
" <th>multisession_index</th>\n",
|
|
" <th>session_index</th>\n",
|
|
" <th>message_index_within_session</th>\n",
|
|
" <th>message_index_across_sessions</th>\n",
|
|
" <th>session_date</th>\n",
|
|
" <th>message</th>\n",
|
|
" <th>previous_messages</th>\n",
|
|
" <th>num_previous_messages</th>\n",
|
|
" <th>message_has_answer</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"user\", \"content\": \"I'm thinking of g...</td>\n",
|
|
" <td>[]</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"assistant\", \"content\": \"Choosing the...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"user\", \"content\": \"I've been doing s...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"assistant\", \"content\": \"That's great...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"user\", \"content\": \"I'll definitely a...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"assistant\", \"content\": \"Advanced pai...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'm thinking of ...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"user\", \"content\": \"I'll definitely a...</td>\n",
|
|
" <td>[{\"role\": \"assistant\", \"content\": \"Choosing th...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"assistant\", \"content\": \"Congratulati...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I've been doing ...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"user\", \"content\": \"That's really hel...</td>\n",
|
|
" <td>[{\"role\": \"assistant\", \"content\": \"That's grea...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>gpt4_2655b836</td>\n",
|
|
" <td>temporal-reasoning</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>2023-04-10 14:47:00</td>\n",
|
|
" <td>{\"role\": \"assistant\", \"content\": \"Congratulati...</td>\n",
|
|
" <td>[{\"role\": \"user\", \"content\": \"I'll definitely ...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" question_id question_type multisession_index session_index \\\n",
|
|
"0 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"1 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"2 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"3 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"4 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"5 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"6 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"7 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"8 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"9 gpt4_2655b836 temporal-reasoning 0 0 \n",
|
|
"\n",
|
|
" message_index_within_session message_index_across_sessions \\\n",
|
|
"0 0 0 \n",
|
|
"1 1 1 \n",
|
|
"2 2 2 \n",
|
|
"3 3 3 \n",
|
|
"4 4 4 \n",
|
|
"5 5 5 \n",
|
|
"6 6 6 \n",
|
|
"7 7 7 \n",
|
|
"8 8 8 \n",
|
|
"9 9 9 \n",
|
|
"\n",
|
|
" session_date message \\\n",
|
|
"0 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'm thinking of g... \n",
|
|
"1 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Choosing the... \n",
|
|
"2 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I've been doing s... \n",
|
|
"3 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"That's great... \n",
|
|
"4 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'll definitely a... \n",
|
|
"5 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Advanced pai... \n",
|
|
"6 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"I'll definitely a... \n",
|
|
"7 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Congratulati... \n",
|
|
"8 2023-04-10 14:47:00 {\"role\": \"user\", \"content\": \"That's really hel... \n",
|
|
"9 2023-04-10 14:47:00 {\"role\": \"assistant\", \"content\": \"Congratulati... \n",
|
|
"\n",
|
|
" previous_messages num_previous_messages \\\n",
|
|
"0 [] 0 \n",
|
|
"1 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 1 \n",
|
|
"2 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 2 \n",
|
|
"3 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 3 \n",
|
|
"4 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 4 \n",
|
|
"5 [{\"role\": \"user\", \"content\": \"I'm thinking of ... 5 \n",
|
|
"6 [{\"role\": \"assistant\", \"content\": \"Choosing th... 5 \n",
|
|
"7 [{\"role\": \"user\", \"content\": \"I've been doing ... 5 \n",
|
|
"8 [{\"role\": \"assistant\", \"content\": \"That's grea... 5 \n",
|
|
"9 [{\"role\": \"user\", \"content\": \"I'll definitely ... 5 \n",
|
|
"\n",
|
|
" message_has_answer \n",
|
|
"0 False \n",
|
|
"1 False \n",
|
|
"2 True \n",
|
|
"3 False \n",
|
|
"4 False \n",
|
|
"5 False \n",
|
|
"6 False \n",
|
|
"7 False \n",
|
|
"8 False \n",
|
|
"9 False "
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"lme_filename = 'longmemeval_oracle.json'\n",
|
|
"snippetize_and_check(lme_filename)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"lme_filename = 'longmemeval_s.json'\n",
|
|
"snippetize_and_check(lme_filename)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"lme_filename = 'longmemeval_m.json'\n",
|
|
"snippetize_and_check(lme_filename)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|