merge changes from code-graph

This commit is contained in:
Rita Aleksziev 2024-11-29 12:16:36 +01:00
commit 4da1657140
5 changed files with 166 additions and 22 deletions

View file

@ -1,3 +1,2 @@
I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and
generate a single patch file that I can apply directly to this repository using git apply.
I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply.
Please respond with a single patch file in the following format.

View file

@ -0,0 +1,3 @@
I need you to solve this issue by looking at the provided knowledge graph and
generating a single patch file that I can apply directly to this repository using git apply.
Please respond with a single patch file in the following format.

64
evals/EC2_README.md Normal file
View file

@ -0,0 +1,64 @@
## Creating the EC2 Instance
Create an EC2 Instance with the
`Ubuntu Image`
Many instance types will work, we used:
`m7a.2xlarge` # more than 8 parallel processes doesn't seem to speed up overall process. Maybe to do with docker parallelism?
DON'T FORGET TO ADD
`500 GB storage`
Or the evaluation run will run out of space
Add a key pair login where you have access to the corresponding key file (*.pem)
## Accessing your instance and setup
To ssh into the instance, you have to save your key pair file (*.pem) to an appropriate location, such as ~/.aws. After launching the instance, you can access the Instance Summary, and retrieve "Public IPv4 DNS" address. Then run
`ssh -i PATH_TO_KEY ubuntu@IPv4ADDRESS`
to gain command line access to the instance.
To copy your current state of cognee, go to the folder that contains "cognee" on your local machine, zip it to cognee.zip and run:
`zip -r cognee.zip cognee`
`scp -i PATH_TO_KEY cognee.zip ubuntu@IPv4ADDRESS:cognee.zip`
And unzip cognee.zip in your SSH session:
`sudo apt install unzip`
`unzip cognee.zip`
Then run:
`cd cognee`
`source evals/cloud/setup_ubuntu_instance.sh`
`sudo usermod -aG docker $USER`
disconnect, and reconnect.
Confirm that `ubuntu` has been added to the docker user group with
`groups | grep docker`
## Running SWE-bench
Then enter a `screen` and activate the virtual env
`screen`
`source venv/bin/activate`
then, from cognee, you can run swe_bench:
`cd cognee`
`python evals/eval_swe_bench.py --cognee_off --max_workers=N_CPUS`
Building the environment images should take roughly 17 minutes
If the virtual env wasn't set up correctly for some reason, just run the last few lines of `setup_ubuntu_instance.sh` manually

View file

@ -0,0 +1,33 @@
sudo apt-get update -y
sudo apt-get install -y ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update -y
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
sudo docker run hello-world
sudo apt install -y unzip
sudo apt-get install -y python3-virtualenv
sudo add-apt-repository -y ppa:deadsnakes/ppa
sudo apt update -y
sudo apt install -y python3.11
virtualenv venv --python=python3.11
source venv/bin/activate
pip install poetry
poetry install
pip install swebench transformers sentencepiece datasets tiktoken protobuf

View file

@ -1,6 +1,7 @@
import argparse
import json
import subprocess
import sys
from pathlib import Path
from datasets import Dataset
@ -13,6 +14,25 @@ from cognee.api.v1.search import SearchType
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt
from evals.eval_utils import download_instances
def check_install_package(package_name):
"""
Check if a pip package is installed and install it if not.
Returns True if package is/was installed successfully, False otherwise.
"""
try:
__import__(package_name)
return True
except ImportError:
try:
subprocess.check_call(
[sys.executable, "-m", "pip", "install", package_name]
)
return True
except subprocess.CalledProcessError:
return False
from cognee.modules.pipelines import Task, run_tasks
from cognee.modules.retrieval.brute_force_triplet_search import \
brute_force_triplet_search
@ -39,7 +59,8 @@ def retrieved_edges_to_string(retrieved_edges):
edge_strings.append(edge_str)
return "\n".join(edge_strings)
async def generate_patch_with_cognee(instance):
async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
await cognee.prune.prune_data()
await cognee.prune.prune_system()
@ -69,7 +90,7 @@ async def generate_patch_with_cognee(instance):
await render_graph(None, include_labels = True, include_nodes = True)
problem_statement = instance['problem_statement']
instructions = read_query_prompt("patch_gen_instructions.txt")
instructions = read_query_prompt("patch_gen_kg_instructions.txt")
retrieved_edges = await brute_force_triplet_search(problem_statement, top_k = 3)
@ -93,20 +114,20 @@ async def generate_patch_with_cognee(instance):
return answer_prediction
async def generate_patch_without_cognee(instance):
problem_statement = instance['problem_statement']
prompt = instance["text"]
async def generate_patch_without_cognee(instance, llm_client):
instructions = read_query_prompt("patch_gen_instructions.txt")
llm_client = get_llm_client()
answer_prediction = await llm_client.acreate_structured_output(
text_input=problem_statement,
system_prompt=prompt,
text_input=instance["text"],
system_prompt=instructions,
response_model=str,
)
return answer_prediction
async def get_preds(dataset, with_cognee=True):
llm_client = get_llm_client()
if with_cognee:
model_name = "with_cognee"
pred_func = generate_patch_with_cognee
@ -114,14 +135,21 @@ async def get_preds(dataset, with_cognee=True):
model_name = "without_cognee"
pred_func = generate_patch_without_cognee
futures = [
(instance["instance_id"], pred_func(instance, llm_client))
for instance in dataset
]
model_patches = await asyncio.gather(*[x[1] for x in futures])
for instance in dataset:
await pred_func(instance)
preds = [
{
"instance_id": instance_id,
"model_patch": model_patch,
"model_name_or_path": model_name,
}
for (instance_id, _), model_patch in zip(futures, model_patches)
]
preds = [{"instance_id": instance["instance_id"],
"model_patch": await pred_func(instance),
"model_name_or_path": model_name} for instance in dataset]
return preds
@ -129,8 +157,12 @@ async def main():
parser = argparse.ArgumentParser(
description="Run LLM predictions on SWE-bench dataset")
parser.add_argument('--cognee_off', action='store_true')
parser.add_argument("--max_workers", type=int, required=True)
args = parser.parse_args()
for dependency in ["transformers", "sentencepiece", "swebench"]:
check_install_package(dependency)
if args.cognee_off:
dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K'
dataset = load_swebench_dataset(dataset_name, split='test')
@ -153,12 +185,25 @@ async def main():
with open(predictions_path, "w") as file:
json.dump(preds, file)
subprocess.run(["python", "-m", "swebench.harness.run_evaluation",
"--dataset_name", dataset_name,
"--split", "test",
"--predictions_path", predictions_path,
"--max_workers", "1",
"--run_id", "test_run"])
subprocess.run(
[
"python",
"-m",
"swebench.harness.run_evaluation",
"--dataset_name",
dataset_name,
"--split",
"test",
"--predictions_path",
predictions_path,
"--max_workers",
str(args.max_workers),
"--run_id",
"test_run",
]
)
if __name__ == "__main__":
import asyncio