From 1bfa3a0ea317c79f36fe817adcf96abf0a24f88e Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Fri, 29 Nov 2024 11:30:30 +0100 Subject: [PATCH] Rebase onto code-graph --- evals/EC2_README.md | 34 +++++++ evals/cloud/setup_ubuntu_instance.sh | 43 +++++++++ evals/eval_swe_bench.py | 127 ++++++++++++++++++--------- 3 files changed, 163 insertions(+), 41 deletions(-) create mode 100644 evals/EC2_README.md create mode 100644 evals/cloud/setup_ubuntu_instance.sh diff --git a/evals/EC2_README.md b/evals/EC2_README.md new file mode 100644 index 000000000..50a92bc27 --- /dev/null +++ b/evals/EC2_README.md @@ -0,0 +1,34 @@ +Create an EC2 Instance with the + +`Ubuntu Image` + +Many instance types will work, we used: + +`m7a.2xlarge` # more than 8 parallel processes doesn't seem to speed up overall process. Maybe to do with docker parallelism? + +DON'T FORGET TO ADD + +`500 GB storage` + +Or the evaluation run will run out of space + +-------------------------------------------------------- + +Then ssh into the instance, run + +source evals/cloud/setup_ubuntu_instance.sh + +sudo usermod -aG docker $USER + +disconnect, and reconnect. + +Then enter a `screen` and activate the virtual env + +screen +source venv/bin/activate + +then, from cognee, you can run swe_bench: + +python evals/eval_swe_bench --cognee_off --max_workers=N_CPUS + +Building the environment images takes roughly 17 minutes \ No newline at end of file diff --git a/evals/cloud/setup_ubuntu_instance.sh b/evals/cloud/setup_ubuntu_instance.sh new file mode 100644 index 000000000..e5386c372 --- /dev/null +++ b/evals/cloud/setup_ubuntu_instance.sh @@ -0,0 +1,43 @@ + +sudo apt-get update +sudo apt-get install ca-certificates curl +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update + +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +sudo docker run hello-world + +sudo apt install unzip + +sudo apt-get install python3-virtualenv + +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt update + +sudo apt install python3.11 + +virtualenv venv --python=python3.11 + +source venv/bin/activate + +pip install poetry + +poetry install + +pip install swebench transformers sentencepiece + +groups | grep docker + +python evals/eval_swe_bench.py --cognee_off + +sudo usermod -aG docker $USER + diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py index 1dd0e58ab..5cbea58ee 100644 --- a/evals/eval_swe_bench.py +++ b/evals/eval_swe_bench.py @@ -1,6 +1,7 @@ import argparse import json import subprocess +import sys from pathlib import Path from datasets import Dataset @@ -29,7 +30,28 @@ from evals.eval_utils import ingest_repos from evals.eval_utils import download_github_repo from evals.eval_utils import delete_repo -async def generate_patch_with_cognee(instance): + +def check_install_package(package_name): + """ + Check if a pip package is installed and install it if not. + Returns True if package is/was installed successfully, False otherwise. + """ + try: + __import__(package_name) + return True + except ImportError: + try: + subprocess.check_call( + [sys.executable, "-m", "pip", "install", package_name] + ) + return True + except subprocess.CalledProcessError: + return False + + + +async def generate_patch_with_cognee(instance, search_type=SearchType.CHUNKS): + await cognee.prune.prune_data() await cognee.prune.prune_system() @@ -59,23 +81,22 @@ async def generate_patch_with_cognee(instance): await render_graph(None, include_labels = True, include_nodes = True) - problem_statement = instance['problem_statement'] + problem_statement = instance["problem_statement"] instructions = read_query_prompt("patch_gen_instructions.txt") graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG' - prompt = "\n".join([ - instructions, - "", - PATCH_EXAMPLE, - "", - "This is the knowledge graph:", - graph_str - ]) + prompt = "\n".join( + [ + instructions, + "", + PATCH_EXAMPLE, + "", + "This is the knowledge graph:", + graph_str, + ] + ) - return 0 - - ''' :TODO: We have to find out how do we do the generation llm_client = get_llm_client() answer_prediction = await llm_client.acreate_structured_output( text_input=problem_statement, @@ -84,13 +105,11 @@ async def generate_patch_with_cognee(instance): ) return answer_prediction - ''' -async def generate_patch_without_cognee(instance): - problem_statement = instance['problem_statement'] +async def generate_patch_without_cognee(instance, llm_client): + problem_statement = instance["problem_statement"] prompt = instance["text"] - llm_client = get_llm_client() answer_prediction = await llm_client.acreate_structured_output( text_input=problem_statement, system_prompt=prompt, @@ -100,43 +119,56 @@ async def generate_patch_without_cognee(instance): async def get_preds(dataset, with_cognee=True): + llm_client = get_llm_client() + if with_cognee: model_name = "with_cognee" - pred_func = generate_patch_with_cognee + futures = [ + (instance["instance_id"], generate_patch_with_cognee(instance)) + for instance in dataset + ] else: model_name = "without_cognee" - pred_func = generate_patch_without_cognee + futures = [ + (instance["instance_id"], generate_patch_without_cognee(instance, llm_client)) + for instance in dataset + ] + model_patches = await asyncio.gather(*[x[1] for x in futures]) + preds = [ + { + "instance_id": instance_id, + "model_patch": model_patch, + "model_name_or_path": model_name, + } + for (instance_id, _), model_patch in zip(futures, model_patches) + ] - for instance in dataset: - await pred_func(instance) - - ''' - preds = [{"instance_id": instance["instance_id"], - "model_patch": await pred_func(instance), - "model_name_or_path": model_name} for instance in dataset] - ''' - return 0 + return preds async def main(): parser = argparse.ArgumentParser( - description="Run LLM predictions on SWE-bench dataset") - parser.add_argument('--cognee_off', action='store_true') + description="Run LLM predictions on SWE-bench dataset" + ) + parser.add_argument("--cognee_off", action="store_true") + parser.add_argument("--max_workers", type=int, required=True) args = parser.parse_args() + for dependency in ["transformers", "sentencepiece", "swebench"]: + check_install_package(dependency) + if args.cognee_off: - dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K' - dataset = load_swebench_dataset(dataset_name, split='test') + dataset_name = "princeton-nlp/SWE-bench_Lite_bm25_13K" + dataset = load_swebench_dataset(dataset_name, split="test") predictions_path = "preds_nocognee.json" if not Path(predictions_path).exists(): preds = await get_preds(dataset, with_cognee=False) with open(predictions_path, "w") as file: json.dump(preds, file) else: - dataset_name = 'princeton-nlp/SWE-bench_Lite' - swe_dataset = load_swebench_dataset( - dataset_name, split='test')[:1] + dataset_name = "princeton-nlp/SWE-bench_Lite" + swe_dataset = load_swebench_dataset(dataset_name, split="test")[:1] filepath = Path("SWE-bench_testsample") if filepath.exists(): dataset = Dataset.load_from_disk(filepath) @@ -147,12 +179,25 @@ async def main(): with open(predictions_path, "w") as file: json.dump(preds, file) - subprocess.run(["python", "-m", "swebench.harness.run_evaluation", - "--dataset_name", dataset_name, - "--split", "test", - "--predictions_path", predictions_path, - "--max_workers", "1", - "--run_id", "test_run"]) + + subprocess.run( + [ + "python", + "-m", + "swebench.harness.run_evaluation", + "--dataset_name", + dataset_name, + "--split", + "test", + "--predictions_path", + predictions_path, + "--max_workers", + str(args.max_workers), + "--run_id", + "test_run", + ] + ) + if __name__ == "__main__": import asyncio