Cog 692 run swe bench on ec2 (#25)

Mainly a tutorial and some small improvements to the evaluation code itself
2024-11-29 11:50:21 +01:00 · 2024-11-29 11:50:21 +01:00 · 56673d360c
commit 56673d360c
parent 6339295d6b a5ae9185cd
5 changed files with 179 additions and 42 deletions
--- a/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt
+++ b/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt
@ -1,3 +1,2 @@
-I need you to solve this issue by looking at the provided knowledge graph and 
+I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply.
 generating a single patch file that I can apply directly to this repository using git apply. 
 Please respond with a single patch file in the following format.
--- a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
+++ b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
@ -0,0 +1,3 @@
 I need you to solve this issue by looking at the provided knowledge graph and 
 generating a single patch file that I can apply directly to this repository using git apply. 
 Please respond with a single patch file in the following format.
--- a/evals/EC2_README.md
+++ b/evals/EC2_README.md
@ -0,0 +1,64 @@
 ## Creating the EC2 Instance
 Create an EC2 Instance with the 
 `Ubuntu Image`
 Many instance types will work, we used:
 `m7a.2xlarge` # more than 8 parallel processes doesn't seem to speed up overall process. Maybe to do with docker parallelism?
 DON'T FORGET TO ADD
 `500 GB storage`
 Or the evaluation run will run out of space
 Add a key pair login where you have access to the corresponding key file (*.pem)
 ## Accessing your instance and setup
 To ssh into the instance, you have to save your key pair file (*.pem) to an appropriate location, such as ~/.aws. After launching the instance, you can access the Instance Summary, and retrieve "Public IPv4 DNS" address. Then run
 `ssh -i PATH_TO_KEY ubuntu@IPv4ADDRESS`
 to gain command line access to the instance.
 To copy your current state of cognee, go to the folder that contains "cognee" on your local machine, zip it to cognee.zip and run:
 `zip -r cognee.zip cognee`
 `scp -i PATH_TO_KEY cognee.zip ubuntu@IPv4ADDRESS:cognee.zip`
 And unzip cognee.zip in your SSH session:
 `sudo apt install unzip`
 `unzip cognee.zip`
 Then run:
 `cd cognee`
 `source evals/cloud/setup_ubuntu_instance.sh`
 `sudo usermod -aG docker $USER`
 disconnect, and reconnect. 
 Confirm that `ubuntu` has been added to the docker user group with
 `groups | grep docker`
 ## Running SWE-bench
 Then enter a `screen` and activate the virtual env
 `screen`
 `source venv/bin/activate`
 then, from cognee, you can run swe_bench:
 `cd cognee`
 `python evals/eval_swe_bench.py --cognee_off --max_workers=N_CPUS`
 Building the environment images should take roughly 17 minutes
 If the virtual env wasn't set up correctly for some reason, just run the last few lines of `setup_ubuntu_instance.sh` manually
--- a/evals/cloud/setup_ubuntu_instance.sh
+++ b/evals/cloud/setup_ubuntu_instance.sh
@ -0,0 +1,33 @@
 sudo apt-get update -y
 sudo apt-get install -y ca-certificates curl
 sudo install -m 0755 -d /etc/apt/keyrings
 sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
 sudo chmod a+r /etc/apt/keyrings/docker.asc
 # Add the repository to Apt sources:
 echo \
 "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
 $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
 sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
 sudo apt-get update -y
 sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
 sudo docker run hello-world
 sudo apt install -y unzip
 sudo apt-get install -y python3-virtualenv
 sudo add-apt-repository -y ppa:deadsnakes/ppa
 sudo apt update -y
 sudo apt install -y python3.11
 virtualenv venv --python=python3.11
 source venv/bin/activate
 pip install poetry
 poetry install
 pip install swebench transformers sentencepiece datasets tiktoken protobuf
--- a/evals/eval_swe_bench.py
+++ b/evals/eval_swe_bench.py
@ -1,6 +1,7 @@
 import argparse
 import json
 import subprocess
 import sys
 from pathlib import Path
 from datasets import Dataset
@ -25,11 +26,27 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.infrastructure.llm.prompts import read_query_prompt
 from evals.eval_utils import download_instances
 from evals.eval_utils import ingest_repos
 from evals.eval_utils import download_github_repo
 from evals.eval_utils import delete_repo
-async def generate_patch_with_cognee(instance):
+
 def check_install_package(package_name):
    """
    Check if a pip package is installed and install it if not.
    Returns True if package is/was installed successfully, False otherwise.
    """
    try:
        __import__(package_name)
        return True
    except ImportError:
        try:
            subprocess.check_call(
                [sys.executable, "-m", "pip", "install", package_name]
            )
            return True
        except subprocess.CalledProcessError:
            return False
 async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
    await cognee.prune.prune_data()
    await cognee.prune.prune_system()
@ -60,46 +77,44 @@ async def generate_patch_with_cognee(instance):
    await render_graph(None, include_labels = True, include_nodes = True)
    problem_statement = instance['problem_statement']
-    instructions = read_query_prompt("patch_gen_instructions.txt")
+    instructions = read_query_prompt("patch_gen_kg_instructions.txt")
    graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG'
-    prompt = "\n".join([
+    prompt = "\n".join(
-        instructions,
+        [
-        "<patch>",
+            problem_statement,
-        PATCH_EXAMPLE,
+            "<patch>",
-        "</patch>",
+            PATCH_EXAMPLE,
-        "This is the knowledge graph:",
+            "</patch>",
-        graph_str
+            "This is the knowledge graph:",
-    ])
+            graph_str,
        ]
    )
    return 0
    ''' :TODO: We have to find out how do we do the generation
    llm_client = get_llm_client()
    answer_prediction = await llm_client.acreate_structured_output(
-        text_input=problem_statement,
+        text_input=prompt,
-        system_prompt=prompt,
+        system_prompt=instructions,
        response_model=str,
    )
    return answer_prediction
    '''
 async def generate_patch_without_cognee(instance):
    problem_statement = instance['problem_statement']
    prompt = instance["text"]
-    llm_client = get_llm_client()
+async def generate_patch_without_cognee(instance, llm_client):
    instructions = read_query_prompt("patch_gen_instructions.txt")
    answer_prediction = await llm_client.acreate_structured_output(
-        text_input=problem_statement,
+        text_input=instance["text"],
-        system_prompt=prompt,
+        system_prompt=instructions,
        response_model=str,
    )
    return answer_prediction
 async def get_preds(dataset, with_cognee=True):
    llm_client = get_llm_client()
    if with_cognee:
        model_name = "with_cognee"
        pred_func = generate_patch_with_cognee
@ -107,24 +122,34 @@ async def get_preds(dataset, with_cognee=True):
        model_name = "without_cognee"
        pred_func = generate_patch_without_cognee
    futures = [
        (instance["instance_id"], pred_func(instance, llm_client))
        for instance in dataset
    ]
    model_patches = await asyncio.gather(*[x[1] for x in futures])
-    for instance in dataset:
+    preds = [
-        await pred_func(instance)
+        {
            "instance_id": instance_id,
            "model_patch": model_patch,
            "model_name_or_path": model_name,
        }
        for (instance_id, _), model_patch in zip(futures, model_patches)
    ]
-    '''
+    return preds
    preds = [{"instance_id": instance["instance_id"],
              "model_patch": await pred_func(instance),
              "model_name_or_path": model_name} for instance in dataset]
    '''
    return 0
 async def main():
    parser = argparse.ArgumentParser(
        description="Run LLM predictions on SWE-bench dataset")
    parser.add_argument('--cognee_off', action='store_true')
    parser.add_argument("--max_workers", type=int, required=True)
    args = parser.parse_args()
    for dependency in ["transformers", "sentencepiece", "swebench"]:
        check_install_package(dependency)
    if args.cognee_off:
        dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K'
        dataset = load_swebench_dataset(dataset_name, split='test')
@ -147,12 +172,25 @@ async def main():
        with open(predictions_path, "w") as file:
            json.dump(preds, file)
-    subprocess.run(["python", "-m", "swebench.harness.run_evaluation",
+
-                    "--dataset_name", dataset_name,
+    subprocess.run(
-                    "--split", "test",
+        [
-                    "--predictions_path",  predictions_path,
+            "python",
-                    "--max_workers", "1",
+            "-m",
-                    "--run_id", "test_run"])
+            "swebench.harness.run_evaluation",
            "--dataset_name",
            dataset_name,
            "--split",
            "test",
            "--predictions_path",
            predictions_path,
            "--max_workers",
            str(args.max_workers),
            "--run_id",
            "test_run",
        ]
    )
 if __name__ == "__main__":
    import asyncio