Connect pipeline to benchmark (#42)

evals/eval_swe_bench runs the code graph pipeline, adds retrieval to the end, then connects the whole thing with swe-bench Some unnecessary utility functions were removed. Note: the pipeline is called for a "graphrag" folder as an example, due to bugs in the pipeline.
2024-11-29 17:05:37 +01:00 · 2024-11-29 17:05:37 +01:00 · 57754b3ca0
commit 57754b3ca0
parent 56673d360c a4c56f118d
5 changed files with 57 additions and 153 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,7 +14,7 @@ __pycache__/
 *$py.class
 full_run.ipynb
-evals/
+logs/
 # C extensions
 *.so
--- a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
+++ b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt
@ -1,3 +1,3 @@
-I need you to solve this issue by looking at the provided knowledge graph and 
+I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and 
-generating a single patch file that I can apply directly to this repository using git apply. 
+generate a single patch file that I can apply directly to this repository using git apply. 
 Please respond with a single patch file in the following format.
--- a/cognee/modules/retrieval/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/brute_force_triplet_search.py
@ -1,13 +1,15 @@
 import asyncio
 import logging
 from typing import List
-from cognee.modules.users.models import User
+
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.models import User
 from cognee.shared.utils import send_telemetry
 def format_triplets(edges):
    print("\n\n\n")
    def filter_attributes(obj, attributes):
@ -48,16 +50,14 @@ def format_triplets(edges):
    return "".join(triplets)
-async def brute_force_triplet_search(query: str, user: User = None, top_k = 5) -> list:
+async def brute_force_triplet_search(query: str, user: User = None, top_k = 5, collections = None) -> list:
    if user is None:
        user = await get_default_user()
    if user is None:
        raise PermissionError("No user found in the system. Please create a user.")
-    retrieved_results = await brute_force_search(query, user, top_k)
+    retrieved_results = await brute_force_search(query, user, top_k, collections=collections)
    return retrieved_results
--- a/evals/eval_swe_bench.py
+++ b/evals/eval_swe_bench.py
@ -4,28 +4,24 @@ import subprocess
 import sys
 from pathlib import Path
 from datasets import Dataset
 from swebench.harness.utils import load_swebench_dataset
 from swebench.inference.make_datasets.create_instance import PATCH_EXAMPLE
 import cognee
 from cognee.shared.data_models import SummarizedContent
 from cognee.shared.utils import render_graph
 from cognee.tasks.repo_processor import (
    enrich_dependency_graph,
    expand_dependency_graph,
    get_repo_file_dependencies,
 )
 from cognee.tasks.storage import add_data_points
 from cognee.tasks.summarization import summarize_code
 from cognee.modules.pipelines import Task, run_tasks
 from cognee.api.v1.cognify.code_graph_pipeline import code_graph_pipeline
 from cognee.api.v1.search import SearchType
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.infrastructure.llm.prompts import read_query_prompt
-from evals.eval_utils import download_instances
+from cognee.modules.pipelines import Task, run_tasks
 from cognee.modules.retrieval.brute_force_triplet_search import \
    brute_force_triplet_search
 from cognee.shared.data_models import SummarizedContent
 from cognee.shared.utils import render_graph
 from cognee.tasks.repo_processor import (enrich_dependency_graph,
                                         expand_dependency_graph,
                                         get_repo_file_dependencies)
 from cognee.tasks.storage import add_data_points
 from cognee.tasks.summarization import summarize_code
 from evals.eval_utils import download_github_repo, retrieved_edges_to_string
 def check_install_package(package_name):
@ -45,30 +41,27 @@ def check_install_package(package_name):
        except subprocess.CalledProcessError:
            return False
 async def generate_patch_with_cognee(instance, llm_client, search_type=SearchType.CHUNKS):
    await cognee.prune.prune_data()
    await cognee.prune.prune_system()
    #dataset_name = "SWE_test_data"
    #await cognee.add('', dataset_name = dataset_name)
    # repo_path = download_github_repo(instance, '../RAW_GIT_REPOS')
-
+    
    repo_path = '/Users/borisarzentar/Projects/graphrag'
-
+    
    tasks = [
        Task(get_repo_file_dependencies),
        Task(add_data_points, task_config = { "batch_size": 50 }),
        Task(enrich_dependency_graph, task_config = { "batch_size": 50 }),
        Task(expand_dependency_graph, task_config = { "batch_size": 50 }),
        Task(add_data_points, task_config = { "batch_size": 50 }),
-        # Task(summarize_code, summarization_model = SummarizedContent),
+        Task(summarize_code, summarization_model = SummarizedContent),
    ]
    pipeline = run_tasks(tasks, repo_path, "cognify_code_pipeline")
-
+        
    async for result in pipeline:
        print(result)
@ -79,19 +72,20 @@ async def generate_patch_with_cognee(instance, llm_client, search_type=SearchTyp
    problem_statement = instance['problem_statement']
    instructions = read_query_prompt("patch_gen_kg_instructions.txt")
-    graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG'
+    retrieved_edges = await brute_force_triplet_search(problem_statement, top_k = 3, collections = ["data_point_source_code", "data_point_text"])
    retrieved_edges_str = retrieved_edges_to_string(retrieved_edges)
-    prompt = "\n".join(
+    prompt = "\n".join([
-        [
+        problem_statement,
-            problem_statement,
+        "<patch>",
-            "<patch>",
+        PATCH_EXAMPLE,
-            PATCH_EXAMPLE,
+        "</patch>",
-            "</patch>",
+        "These are the retrieved edges:",
-            "This is the knowledge graph:",
+        retrieved_edges_str
-            graph_str,
+    ])
        ]
    )
    llm_client = get_llm_client()
    answer_prediction = await llm_client.acreate_structured_output(
        text_input=prompt,
        system_prompt=instructions,
@ -162,13 +156,8 @@ async def main():
        dataset_name = 'princeton-nlp/SWE-bench_Lite'
        swe_dataset = load_swebench_dataset(
            dataset_name, split='test')[:1]
        filepath = Path("SWE-bench_testsample")
        if filepath.exists():
            dataset = Dataset.load_from_disk(filepath)
        else:
            dataset = download_instances(swe_dataset, filepath)
        predictions_path = "preds.json"
-        preds = await get_preds(dataset, with_cognee=not args.cognee_off)
+        preds = await get_preds(swe_dataset, with_cognee=not args.cognee_off)
        with open(predictions_path, "w") as file:
            json.dump(preds, file)
--- a/evals/eval_utils.py
+++ b/evals/eval_utils.py
@ -1,107 +1,7 @@
 import os
 from copy import deepcopy
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from datasets import Dataset
 from swebench.inference.make_datasets.create_instance import make_code_text
 from swebench.inference.make_datasets.utils import (AutoContextManager,
                                                    ingest_directory_contents)
 from tqdm.auto import tqdm
 from git import Repo
 import shutil
-def ingest_files(filenames):
+from git import Repo
    files_dict = dict()
    for filename in filenames:
        with open(filename) as f:
            content = f.read()
        files_dict[filename] = content
    return files_dict
 def ingest_repos(input_instances):
    orig_dir = os.getcwd()
    with TemporaryDirectory(
        dir="/scratch" if os.path.exists("/scratch") else "/tmp"
    ) as root_dir:
        for instance in tqdm(
            input_instances.values(),
            total=len(input_instances),
            desc="Downloading repos on specific commits",
        ):
            try:
                with AutoContextManager(
                    instance, root_dir
                ) as cm:
                    readmes = cm.get_readme_files()
                    instance["readmes"] = ingest_files(readmes)
                    instance["file_contents"] = ingest_directory_contents(
                        cm.repo_path
                    )
            finally:
                # if AutoContextManager fails to exit properly future exits will return the wrong directory
                os.chdir(orig_dir)
    return input_instances
 def extract_fields(instance):
    readmes_text = make_code_text(instance["readmes"])
    code_text = make_code_text(
        instance["file_contents"], add_line_numbers=False)
    text_inputs = "\n".join([readmes_text, code_text])
    text_inputs = text_inputs.strip() + "\n\n"
    # text_inputs = code_text
    patch = "\n".join(["<patch>", instance["patch"], "</patch>"])
    return {**instance, "text": text_inputs, "patch": patch}
 def create_dataset(input_instances):
    columns = [
        "instance_id",
        "text",
        "repo",
        "base_commit",
        "problem_statement",
        "hints_text",
        "created_at",
        "patch",
        "test_patch",
        "version",
        "FAIL_TO_PASS",
        "PASS_TO_PASS",
        "environment_setup_commit",
    ]
    data_table = {key: list() for key in columns}
    for instance in input_instances.values():
        datum = extract_fields(instance)
        for key in columns:
            data_table[key].append(datum[key] if key in datum else "")
    dataset = Dataset.from_dict(data_table)
    return dataset
 def download_instances(
    input_data,
    path=Path("SWE-bench_testsample"),
    verbose=False,
 ):
    """Downloads code from github.
    Args:
    - input_data: dictionary with unprocessed input instances.
    - verbose: set ContextManager verbose to True
    """
    input_instances = {x["instance_id"]: x for x in input_data}
    input_instances_copy = deepcopy(input_instances)
    input_instances_with_text = ingest_repos(input_instances_copy)
    dataset = create_dataset(input_instances_with_text)
    dataset.save_to_disk(path)
    return dataset
 def download_github_repo(instance, output_dir):
@ -154,4 +54,19 @@ def delete_repo(repo_path):
        else:
            print(f"Repository path {repo_path} does not exist. Nothing to delete.")
    except Exception as e:
-        print(f"Error deleting repository at {repo_path}: {e}")
+        print(f"Error deleting repository at {repo_path}: {e}")
 def node_to_string(node):
    text = node.attributes["text"]
    type = node.attributes["type"]
    return f"Node(id: {node.id}, type: {type}, description: {text})"
 def retrieved_edges_to_string(retrieved_edges):
    edge_strings = []
    for edge in retrieved_edges:
        relationship_type = edge.attributes["relationship_type"]
        edge_str = f"{node_to_string(edge.node1)} {relationship_type} {node_to_string(edge.node2)}"
        edge_strings.append(edge_str)
    return "\n".join(edge_strings)