diff --git a/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt b/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt
new file mode 100644
index 000000000..1553753ab
--- /dev/null
+++ b/cognee/infrastructure/llm/prompts/patch_gen_instructions.txt
@@ -0,0 +1,3 @@
+I need you to solve this issue by looking at the provided knowledge graph and
+generating a single patch file that I can apply directly to this repository using git apply.
+Please respond with a single patch file in the following format.
\ No newline at end of file
diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py
new file mode 100644
index 000000000..ec93bda07
--- /dev/null
+++ b/evals/eval_swe_bench.py
@@ -0,0 +1,118 @@
+import argparse
+import json
+import subprocess
+from pathlib import Path
+
+from datasets import Dataset
+from swebench.harness.utils import load_swebench_dataset
+from swebench.inference.make_datasets.create_instance import PATCH_EXAMPLE
+
+import cognee
+from cognee.api.v1.cognify.code_graph_pipeline import code_graph_pipeline
+from cognee.api.v1.search import SearchType
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.infrastructure.llm.get_llm_client import get_llm_client
+from cognee.infrastructure.llm.prompts import read_query_prompt
+from evals.eval_utils import download_instances
+
+
+async def generate_patch_with_cognee(instance, search_type=SearchType.CHUNKS):
+
+ await cognee.prune.prune_data()
+ await cognee.prune.prune_system(metadata=True)
+
+ dataset_name = "SWE_test_data"
+ code_text = instance["text"]
+ await cognee.add([code_text], dataset_name)
+ await code_graph_pipeline([dataset_name])
+ graph_engine = await get_graph_engine()
+ with open(graph_engine.filename, "r") as f:
+ graph_str = f.read()
+
+ problem_statement = instance['problem_statement']
+ instructions = read_query_prompt("patch_gen_instructions.txt")
+
+ prompt = "\n".join([
+ instructions,
+ "",
+ PATCH_EXAMPLE,
+ "",
+ "This is the knowledge graph:",
+ graph_str
+ ])
+
+ llm_client = get_llm_client()
+ answer_prediction = await llm_client.acreate_structured_output(
+ text_input=problem_statement,
+ system_prompt=prompt,
+ response_model=str,
+ )
+ return answer_prediction
+
+
+async def generate_patch_without_cognee(instance):
+ problem_statement = instance['problem_statement']
+ prompt = instance["text"]
+
+ llm_client = get_llm_client()
+ answer_prediction = await llm_client.acreate_structured_output(
+ text_input=problem_statement,
+ system_prompt=prompt,
+ response_model=str,
+ )
+ return answer_prediction
+
+
+async def get_preds(dataset, with_cognee=True):
+ if with_cognee:
+ model_name = "with_cognee"
+ pred_func = generate_patch_with_cognee
+ else:
+ model_name = "without_cognee"
+ pred_func = generate_patch_without_cognee
+
+ preds = [{"instance_id": instance["instance_id"],
+ "model_patch": await pred_func(instance),
+ "model_name_or_path": model_name} for instance in dataset]
+
+ return preds
+
+
+async def main():
+ parser = argparse.ArgumentParser(
+ description="Run LLM predictions on SWE-bench dataset")
+ parser.add_argument('--cognee_off', action='store_true')
+ args = parser.parse_args()
+
+ if args.cognee_off:
+ dataset_name = 'princeton-nlp/SWE-bench_Lite_bm25_13K'
+ dataset = load_swebench_dataset(dataset_name, split='test')
+ predictions_path = "preds_nocognee.json"
+ if not Path(predictions_path).exists():
+ preds = await get_preds(dataset, with_cognee=False)
+ with open(predictions_path, "w") as file:
+ json.dump(preds, file)
+ else:
+ dataset_name = 'princeton-nlp/SWE-bench_Lite'
+ swe_dataset = load_swebench_dataset(
+ dataset_name, split='test')[:1]
+ filepath = Path("SWE-bench_testsample")
+ if filepath.exists():
+ dataset = Dataset.load_from_disk(filepath)
+ else:
+ dataset = download_instances(swe_dataset, filepath)
+ predictions_path = "preds.json"
+ preds = await get_preds(dataset, with_cognee=not args.cognee_off)
+ with open(predictions_path, "w") as file:
+ json.dump(preds, file)
+
+ subprocess.run(["python", "-m", "swebench.harness.run_evaluation",
+ "--dataset_name", dataset_name,
+ "--split", "test",
+ "--predictions_path", predictions_path,
+ "--max_workers", "1",
+ "--run_id", "test_run"])
+
+if __name__ == "__main__":
+ import asyncio
+ asyncio.run(main(), debug=True)
diff --git a/evals/eval_utils.py b/evals/eval_utils.py
new file mode 100644
index 000000000..e95a84cec
--- /dev/null
+++ b/evals/eval_utils.py
@@ -0,0 +1,103 @@
+import os
+from copy import deepcopy
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from datasets import Dataset
+from swebench.inference.make_datasets.create_instance import make_code_text
+from swebench.inference.make_datasets.utils import (AutoContextManager,
+ ingest_directory_contents)
+from tqdm.auto import tqdm
+
+
+def ingest_files(filenames):
+ files_dict = dict()
+ for filename in filenames:
+ with open(filename) as f:
+ content = f.read()
+ files_dict[filename] = content
+ return files_dict
+
+
+def ingest_repos(input_instances):
+ orig_dir = os.getcwd()
+ with TemporaryDirectory(
+ dir="/scratch" if os.path.exists("/scratch") else "/tmp"
+ ) as root_dir:
+ for instance in tqdm(
+ input_instances.values(),
+ total=len(input_instances),
+ desc="Downloading repos on specific commits",
+ ):
+ try:
+ with AutoContextManager(
+ instance, root_dir
+ ) as cm:
+ readmes = cm.get_readme_files()
+ instance["readmes"] = ingest_files(readmes)
+ instance["file_contents"] = ingest_directory_contents(
+ cm.repo_path
+ )
+ finally:
+ # if AutoContextManager fails to exit properly future exits will return the wrong directory
+ os.chdir(orig_dir)
+
+ return input_instances
+
+
+def extract_fields(instance):
+ readmes_text = make_code_text(instance["readmes"])
+ code_text = make_code_text(
+ instance["file_contents"], add_line_numbers=False)
+
+ text_inputs = "\n".join([readmes_text, code_text])
+ text_inputs = text_inputs.strip() + "\n\n"
+ # text_inputs = code_text
+ patch = "\n".join(["", instance["patch"], ""])
+ return {**instance, "text": text_inputs, "patch": patch}
+
+
+def create_dataset(input_instances):
+ columns = [
+ "instance_id",
+ "text",
+ "repo",
+ "base_commit",
+ "problem_statement",
+ "hints_text",
+ "created_at",
+ "patch",
+ "test_patch",
+ "version",
+ "FAIL_TO_PASS",
+ "PASS_TO_PASS",
+ "environment_setup_commit",
+ ]
+
+ data_table = {key: list() for key in columns}
+ for instance in input_instances.values():
+ datum = extract_fields(instance)
+ for key in columns:
+ data_table[key].append(datum[key] if key in datum else "")
+ dataset = Dataset.from_dict(data_table)
+
+ return dataset
+
+
+def download_instances(
+ input_data,
+ path=Path("SWE-bench_testsample"),
+ verbose=False,
+):
+ """Downloads code from github.
+
+ Args:
+ - input_data: dictionary with unprocessed input instances.
+ - verbose: set ContextManager verbose to True
+ """
+ input_instances = {x["instance_id"]: x for x in input_data}
+ input_instances_copy = deepcopy(input_instances)
+ input_instances_with_text = ingest_repos(input_instances_copy)
+ dataset = create_dataset(input_instances_with_text)
+ dataset.save_to_disk(path)
+ return dataset