diff --git a/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt b/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt index ebbb03f75..5e7e48dda 100644 --- a/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +++ b/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt @@ -1,3 +1,2 @@ -I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and -generate a single patch file that I can apply directly to this repository using git apply. +I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply. Please respond with a single patch file in the following format. \ No newline at end of file diff --git a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt new file mode 100644 index 000000000..1553753ab --- /dev/null +++ b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt @@ -0,0 +1,3 @@ +I need you to solve this issue by looking at the provided knowledge graph and +generating a single patch file that I can apply directly to this repository using git apply. +Please respond with a single patch file in the following format. \ No newline at end of file diff --git a/evals/EC2_README.md b/evals/EC2_README.md new file mode 100644 index 000000000..d6a937ed7 --- /dev/null +++ b/evals/EC2_README.md @@ -0,0 +1,64 @@ +## Creating the EC2 Instance + +Create an EC2 Instance with the + +`Ubuntu Image` + +Many instance types will work, we used: + +`m7a.2xlarge` # more than 8 parallel processes doesn't seem to speed up overall process. Maybe to do with docker parallelism? + +DON'T FORGET TO ADD + +`500 GB storage` + +Or the evaluation run will run out of space + +Add a key pair login where you have access to the corresponding key file (*.pem) + +## Accessing your instance and setup + +To ssh into the instance, you have to save your key pair file (*.pem) to an appropriate location, such as ~/.aws. After launching the instance, you can access the Instance Summary, and retrieve "Public IPv4 DNS" address. Then run + +`ssh -i PATH_TO_KEY ubuntu@IPv4ADDRESS` + +to gain command line access to the instance. + +To copy your current state of cognee, go to the folder that contains "cognee" on your local machine, zip it to cognee.zip and run: + +`zip -r cognee.zip cognee` +`scp -i PATH_TO_KEY cognee.zip ubuntu@IPv4ADDRESS:cognee.zip` + +And unzip cognee.zip in your SSH session: + +`sudo apt install unzip` +`unzip cognee.zip` + +Then run: +`cd cognee` +`source evals/cloud/setup_ubuntu_instance.sh` + +`sudo usermod -aG docker $USER` + +disconnect, and reconnect. + +Confirm that `ubuntu` has been added to the docker user group with + +`groups | grep docker` + +## Running SWE-bench + +Then enter a `screen` and activate the virtual env + +`screen` +`source venv/bin/activate` + +then, from cognee, you can run swe_bench: + +`cd cognee` + +`python evals/eval_swe_bench.py --cognee_off --max_workers=N_CPUS` + +Building the environment images should take roughly 17 minutes + +If the virtual env wasn't set up correctly for some reason, just run the last few lines of `setup_ubuntu_instance.sh` manually \ No newline at end of file diff --git a/evals/cloud/setup_ubuntu_instance.sh b/evals/cloud/setup_ubuntu_instance.sh new file mode 100644 index 000000000..e05b761e2 --- /dev/null +++ b/evals/cloud/setup_ubuntu_instance.sh @@ -0,0 +1,33 @@ +sudo apt-get update -y +sudo apt-get install -y ca-certificates curl +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +echo \ +"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ +sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update -y + +sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +sudo docker run hello-world + +sudo apt install -y unzip + +sudo apt-get install -y python3-virtualenv + +sudo add-apt-repository -y ppa:deadsnakes/ppa + +sudo apt update -y + +sudo apt install -y python3.11 + +virtualenv venv --python=python3.11 + +source venv/bin/activate +pip install poetry +poetry install +pip install swebench transformers sentencepiece datasets tiktoken protobuf diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py index 80fa35623..694d5c8ea 100644 --- a/evals/eval_swe_bench.py +++ b/evals/eval_swe_bench.py @@ -1,6 +1,7 @@ import argparse import json import subprocess +import sys from pathlib import Path from datasets import Dataset @@ -13,6 +14,25 @@ from cognee.api.v1.search import SearchType from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.prompts import read_query_prompt +from evals.eval_utils import download_instances + + +def check_install_package(package_name): + """ + Check if a pip package is installed and install it if not. + Returns True if package is/was installed successfully, False otherwise. + """ + try: + __import__(package_name) + return True + except ImportError: + try: + subprocess.check_call( + [sys.executable, "-m", "pip", "install", package_name] + ) + return True + except subprocess.CalledProcessError: + return False from cognee.modules.pipelines import Task, run_tasks from cognee.modules.retrieval.brute_force_triplet_search import \ brute_force_triplet_search @@ -39,7 +59,8 @@ def retrieved_edges_to_string(retrieved_edges): edge_strings.append(edge_str) return "\n".join(edge_strings) -async def generate_patch_with_cognee(instance): +async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS): + await cognee.prune.prune_data() await cognee.prune.prune_system() @@ -69,7 +90,7 @@ async def generate_patch_with_cognee(instance): await render_graph(None, include_labels = True, include_nodes = True) problem_statement = instance['problem_statement'] - instructions = read_query_prompt("patch_gen_instructions.txt") + instructions = read_query_prompt("patch_gen_kg_instructions.txt") retrieved_edges = await brute_force_triplet_search(problem_statement, top_k = 3) @@ -93,20 +114,20 @@ async def generate_patch_with_cognee(instance): return answer_prediction -async def generate_patch_without_cognee(instance): - problem_statement = instance['problem_statement'] - prompt = instance["text"] +async def generate_patch_without_cognee(instance, llm_client): + instructions = read_query_prompt("patch_gen_instructions.txt") - llm_client = get_llm_client() answer_prediction = await llm_client.acreate_structured_output( - text_input=problem_statement, - system_prompt=prompt, + text_input=instance["text"], + system_prompt=instructions, response_model=str, ) return answer_prediction async def get_preds(dataset, with_cognee=True): + llm_client = get_llm_client() + if with_cognee: model_name = "with_cognee" pred_func = generate_patch_with_cognee @@ -114,14 +135,21 @@ async def get_preds(dataset, with_cognee=True): model_name = "without_cognee" pred_func = generate_patch_without_cognee + futures = [ + (instance["instance_id"], pred_func(instance, llm_client)) + for instance in dataset + ] + model_patches = await asyncio.gather(*[x[1] for x in futures]) - for instance in dataset: - await pred_func(instance) + preds = [ + { + "instance_id": instance_id, + "model_patch": model_patch, + "model_name_or_path": model_name, + } + for (instance_id, _), model_patch in zip(futures, model_patches) + ] - preds = [{"instance_id": instance["instance_id"], - "model_patch": await pred_func(instance), - "model_name_or_path": model_name} for instance in dataset] - return preds @@ -129,8 +157,12 @@ async def main(): parser = argparse.ArgumentParser( description="Run LLM predictions on SWE-bench dataset") parser.add_argument('--cognee_off', action='store_true') + parser.add_argument("--max_workers", type=int, required=True) args = parser.parse_args() + for dependency in ["transformers", "sentencepiece", "swebench"]: + check_install_package(dependency) + if args.cognee_off: dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K' dataset = load_swebench_dataset(dataset_name, split='test') @@ -153,12 +185,25 @@ async def main(): with open(predictions_path, "w") as file: json.dump(preds, file) - subprocess.run(["python", "-m", "swebench.harness.run_evaluation", - "--dataset_name", dataset_name, - "--split", "test", - "--predictions_path", predictions_path, - "--max_workers", "1", - "--run_id", "test_run"]) + + subprocess.run( + [ + "python", + "-m", + "swebench.harness.run_evaluation", + "--dataset_name", + dataset_name, + "--split", + "test", + "--predictions_path", + predictions_path, + "--max_workers", + str(args.max_workers), + "--run_id", + "test_run", + ] + ) + if __name__ == "__main__": import asyncio