Cog 692 run swe bench on ec2 (#25)
Mainly a tutorial and some small improvements to the evaluation code itself
This commit is contained in:
commit
56673d360c
5 changed files with 179 additions and 42 deletions
|
|
@ -1,3 +1,2 @@
|
||||||
I need you to solve this issue by looking at the provided knowledge graph and
|
I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply.
|
||||||
generating a single patch file that I can apply directly to this repository using git apply.
|
|
||||||
Please respond with a single patch file in the following format.
|
Please respond with a single patch file in the following format.
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
I need you to solve this issue by looking at the provided knowledge graph and
|
||||||
|
generating a single patch file that I can apply directly to this repository using git apply.
|
||||||
|
Please respond with a single patch file in the following format.
|
||||||
64
evals/EC2_README.md
Normal file
64
evals/EC2_README.md
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
## Creating the EC2 Instance
|
||||||
|
|
||||||
|
Create an EC2 Instance with the
|
||||||
|
|
||||||
|
`Ubuntu Image`
|
||||||
|
|
||||||
|
Many instance types will work, we used:
|
||||||
|
|
||||||
|
`m7a.2xlarge` # more than 8 parallel processes doesn't seem to speed up overall process. Maybe to do with docker parallelism?
|
||||||
|
|
||||||
|
DON'T FORGET TO ADD
|
||||||
|
|
||||||
|
`500 GB storage`
|
||||||
|
|
||||||
|
Or the evaluation run will run out of space
|
||||||
|
|
||||||
|
Add a key pair login where you have access to the corresponding key file (*.pem)
|
||||||
|
|
||||||
|
## Accessing your instance and setup
|
||||||
|
|
||||||
|
To ssh into the instance, you have to save your key pair file (*.pem) to an appropriate location, such as ~/.aws. After launching the instance, you can access the Instance Summary, and retrieve "Public IPv4 DNS" address. Then run
|
||||||
|
|
||||||
|
`ssh -i PATH_TO_KEY ubuntu@IPv4ADDRESS`
|
||||||
|
|
||||||
|
to gain command line access to the instance.
|
||||||
|
|
||||||
|
To copy your current state of cognee, go to the folder that contains "cognee" on your local machine, zip it to cognee.zip and run:
|
||||||
|
|
||||||
|
`zip -r cognee.zip cognee`
|
||||||
|
`scp -i PATH_TO_KEY cognee.zip ubuntu@IPv4ADDRESS:cognee.zip`
|
||||||
|
|
||||||
|
And unzip cognee.zip in your SSH session:
|
||||||
|
|
||||||
|
`sudo apt install unzip`
|
||||||
|
`unzip cognee.zip`
|
||||||
|
|
||||||
|
Then run:
|
||||||
|
`cd cognee`
|
||||||
|
`source evals/cloud/setup_ubuntu_instance.sh`
|
||||||
|
|
||||||
|
`sudo usermod -aG docker $USER`
|
||||||
|
|
||||||
|
disconnect, and reconnect.
|
||||||
|
|
||||||
|
Confirm that `ubuntu` has been added to the docker user group with
|
||||||
|
|
||||||
|
`groups | grep docker`
|
||||||
|
|
||||||
|
## Running SWE-bench
|
||||||
|
|
||||||
|
Then enter a `screen` and activate the virtual env
|
||||||
|
|
||||||
|
`screen`
|
||||||
|
`source venv/bin/activate`
|
||||||
|
|
||||||
|
then, from cognee, you can run swe_bench:
|
||||||
|
|
||||||
|
`cd cognee`
|
||||||
|
|
||||||
|
`python evals/eval_swe_bench.py --cognee_off --max_workers=N_CPUS`
|
||||||
|
|
||||||
|
Building the environment images should take roughly 17 minutes
|
||||||
|
|
||||||
|
If the virtual env wasn't set up correctly for some reason, just run the last few lines of `setup_ubuntu_instance.sh` manually
|
||||||
33
evals/cloud/setup_ubuntu_instance.sh
Normal file
33
evals/cloud/setup_ubuntu_instance.sh
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y ca-certificates curl
|
||||||
|
sudo install -m 0755 -d /etc/apt/keyrings
|
||||||
|
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||||||
|
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||||||
|
|
||||||
|
# Add the repository to Apt sources:
|
||||||
|
echo \
|
||||||
|
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||||
|
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||||||
|
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||||
|
sudo apt-get update -y
|
||||||
|
|
||||||
|
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||||
|
|
||||||
|
sudo docker run hello-world
|
||||||
|
|
||||||
|
sudo apt install -y unzip
|
||||||
|
|
||||||
|
sudo apt-get install -y python3-virtualenv
|
||||||
|
|
||||||
|
sudo add-apt-repository -y ppa:deadsnakes/ppa
|
||||||
|
|
||||||
|
sudo apt update -y
|
||||||
|
|
||||||
|
sudo apt install -y python3.11
|
||||||
|
|
||||||
|
virtualenv venv --python=python3.11
|
||||||
|
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install poetry
|
||||||
|
poetry install
|
||||||
|
pip install swebench transformers sentencepiece datasets tiktoken protobuf
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
|
|
@ -25,11 +26,27 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||||
from evals.eval_utils import download_instances
|
from evals.eval_utils import download_instances
|
||||||
from evals.eval_utils import ingest_repos
|
|
||||||
from evals.eval_utils import download_github_repo
|
|
||||||
from evals.eval_utils import delete_repo
|
|
||||||
|
|
||||||
async def generate_patch_with_cognee(instance):
|
|
||||||
|
def check_install_package(package_name):
|
||||||
|
"""
|
||||||
|
Check if a pip package is installed and install it if not.
|
||||||
|
Returns True if package is/was installed successfully, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
__import__(package_name)
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
subprocess.check_call(
|
||||||
|
[sys.executable, "-m", "pip", "install", package_name]
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
await cognee.prune.prune_system()
|
await cognee.prune.prune_system()
|
||||||
|
|
||||||
|
|
@ -60,46 +77,44 @@ async def generate_patch_with_cognee(instance):
|
||||||
await render_graph(None, include_labels = True, include_nodes = True)
|
await render_graph(None, include_labels = True, include_nodes = True)
|
||||||
|
|
||||||
problem_statement = instance['problem_statement']
|
problem_statement = instance['problem_statement']
|
||||||
instructions = read_query_prompt("patch_gen_instructions.txt")
|
instructions = read_query_prompt("patch_gen_kg_instructions.txt")
|
||||||
|
|
||||||
graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG'
|
graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG'
|
||||||
|
|
||||||
prompt = "\n".join([
|
prompt = "\n".join(
|
||||||
instructions,
|
[
|
||||||
"<patch>",
|
problem_statement,
|
||||||
PATCH_EXAMPLE,
|
"<patch>",
|
||||||
"</patch>",
|
PATCH_EXAMPLE,
|
||||||
"This is the knowledge graph:",
|
"</patch>",
|
||||||
graph_str
|
"This is the knowledge graph:",
|
||||||
])
|
graph_str,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
''' :TODO: We have to find out how do we do the generation
|
|
||||||
llm_client = get_llm_client()
|
|
||||||
answer_prediction = await llm_client.acreate_structured_output(
|
answer_prediction = await llm_client.acreate_structured_output(
|
||||||
text_input=problem_statement,
|
text_input=prompt,
|
||||||
system_prompt=prompt,
|
system_prompt=instructions,
|
||||||
response_model=str,
|
response_model=str,
|
||||||
)
|
)
|
||||||
|
|
||||||
return answer_prediction
|
return answer_prediction
|
||||||
'''
|
|
||||||
|
|
||||||
async def generate_patch_without_cognee(instance):
|
|
||||||
problem_statement = instance['problem_statement']
|
|
||||||
prompt = instance["text"]
|
|
||||||
|
|
||||||
llm_client = get_llm_client()
|
async def generate_patch_without_cognee(instance, llm_client):
|
||||||
|
instructions = read_query_prompt("patch_gen_instructions.txt")
|
||||||
|
|
||||||
answer_prediction = await llm_client.acreate_structured_output(
|
answer_prediction = await llm_client.acreate_structured_output(
|
||||||
text_input=problem_statement,
|
text_input=instance["text"],
|
||||||
system_prompt=prompt,
|
system_prompt=instructions,
|
||||||
response_model=str,
|
response_model=str,
|
||||||
)
|
)
|
||||||
return answer_prediction
|
return answer_prediction
|
||||||
|
|
||||||
|
|
||||||
async def get_preds(dataset, with_cognee=True):
|
async def get_preds(dataset, with_cognee=True):
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
if with_cognee:
|
if with_cognee:
|
||||||
model_name = "with_cognee"
|
model_name = "with_cognee"
|
||||||
pred_func = generate_patch_with_cognee
|
pred_func = generate_patch_with_cognee
|
||||||
|
|
@ -107,24 +122,34 @@ async def get_preds(dataset, with_cognee=True):
|
||||||
model_name = "without_cognee"
|
model_name = "without_cognee"
|
||||||
pred_func = generate_patch_without_cognee
|
pred_func = generate_patch_without_cognee
|
||||||
|
|
||||||
|
futures = [
|
||||||
|
(instance["instance_id"], pred_func(instance, llm_client))
|
||||||
|
for instance in dataset
|
||||||
|
]
|
||||||
|
model_patches = await asyncio.gather(*[x[1] for x in futures])
|
||||||
|
|
||||||
for instance in dataset:
|
preds = [
|
||||||
await pred_func(instance)
|
{
|
||||||
|
"instance_id": instance_id,
|
||||||
|
"model_patch": model_patch,
|
||||||
|
"model_name_or_path": model_name,
|
||||||
|
}
|
||||||
|
for (instance_id, _), model_patch in zip(futures, model_patches)
|
||||||
|
]
|
||||||
|
|
||||||
'''
|
return preds
|
||||||
preds = [{"instance_id": instance["instance_id"],
|
|
||||||
"model_patch": await pred_func(instance),
|
|
||||||
"model_name_or_path": model_name} for instance in dataset]
|
|
||||||
'''
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Run LLM predictions on SWE-bench dataset")
|
description="Run LLM predictions on SWE-bench dataset")
|
||||||
parser.add_argument('--cognee_off', action='store_true')
|
parser.add_argument('--cognee_off', action='store_true')
|
||||||
|
parser.add_argument("--max_workers", type=int, required=True)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
for dependency in ["transformers", "sentencepiece", "swebench"]:
|
||||||
|
check_install_package(dependency)
|
||||||
|
|
||||||
if args.cognee_off:
|
if args.cognee_off:
|
||||||
dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K'
|
dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K'
|
||||||
dataset = load_swebench_dataset(dataset_name, split='test')
|
dataset = load_swebench_dataset(dataset_name, split='test')
|
||||||
|
|
@ -147,12 +172,25 @@ async def main():
|
||||||
with open(predictions_path, "w") as file:
|
with open(predictions_path, "w") as file:
|
||||||
json.dump(preds, file)
|
json.dump(preds, file)
|
||||||
|
|
||||||
subprocess.run(["python", "-m", "swebench.harness.run_evaluation",
|
|
||||||
"--dataset_name", dataset_name,
|
subprocess.run(
|
||||||
"--split", "test",
|
[
|
||||||
"--predictions_path", predictions_path,
|
"python",
|
||||||
"--max_workers", "1",
|
"-m",
|
||||||
"--run_id", "test_run"])
|
"swebench.harness.run_evaluation",
|
||||||
|
"--dataset_name",
|
||||||
|
dataset_name,
|
||||||
|
"--split",
|
||||||
|
"test",
|
||||||
|
"--predictions_path",
|
||||||
|
predictions_path,
|
||||||
|
"--max_workers",
|
||||||
|
str(args.max_workers),
|
||||||
|
"--run_id",
|
||||||
|
"test_run",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue