diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py index 1dd0e58ab..c16e821fa 100644 --- a/evals/eval_swe_bench.py +++ b/evals/eval_swe_bench.py @@ -8,26 +8,35 @@ from swebench.harness.utils import load_swebench_dataset from swebench.inference.make_datasets.create_instance import PATCH_EXAMPLE import cognee - -from cognee.shared.data_models import SummarizedContent -from cognee.shared.utils import render_graph -from cognee.tasks.repo_processor import ( - enrich_dependency_graph, - expand_dependency_graph, - get_repo_file_dependencies, -) -from cognee.tasks.storage import add_data_points -from cognee.tasks.summarization import summarize_code -from cognee.modules.pipelines import Task, run_tasks from cognee.api.v1.cognify.code_graph_pipeline import code_graph_pipeline from cognee.api.v1.search import SearchType from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.prompts import read_query_prompt -from evals.eval_utils import download_instances -from evals.eval_utils import ingest_repos -from evals.eval_utils import download_github_repo -from evals.eval_utils import delete_repo +from cognee.modules.pipelines import Task, run_tasks +from cognee.modules.retrieval.brute_force_triplet_search import \ + brute_force_triplet_search +from cognee.shared.data_models import SummarizedContent +from cognee.shared.utils import render_graph +from cognee.tasks.repo_processor import (enrich_dependency_graph, + expand_dependency_graph, + get_repo_file_dependencies) +from cognee.tasks.storage import add_data_points +from cognee.tasks.summarization import summarize_code +from evals.eval_utils import (delete_repo, download_github_repo, + download_instances, ingest_repos) + + +def node_to_string(node): + text = node.attributes["text"] + return f"Node({node.id}, {text})" +def retrieved_edges_to_string(retrieved_edges): + edge_strings = [] + for edge in retrieved_edges: + relationship_type = edge.attributes["relationship_type"] + edge_str = f"{node_to_string(edge.node1)} {relationship_type} {node_to_string(edge.node2)}" + edge_strings.append(edge_str) + return "\n".join(edge_strings) async def generate_patch_with_cognee(instance): await cognee.prune.prune_data() @@ -39,19 +48,18 @@ async def generate_patch_with_cognee(instance): # repo_path = download_github_repo(instance, '../RAW_GIT_REPOS') - repo_path = '/Users/borisarzentar/Projects/graphrag' - + repo_path = '../minimal_repo' tasks = [ Task(get_repo_file_dependencies), Task(add_data_points, task_config = { "batch_size": 50 }), Task(enrich_dependency_graph, task_config = { "batch_size": 50 }), Task(expand_dependency_graph, task_config = { "batch_size": 50 }), Task(add_data_points, task_config = { "batch_size": 50 }), - # Task(summarize_code, summarization_model = SummarizedContent), + Task(summarize_code, summarization_model = SummarizedContent), ] pipeline = run_tasks(tasks, repo_path, "cognify_code_pipeline") - + async for result in pipeline: print(result) @@ -62,29 +70,27 @@ async def generate_patch_with_cognee(instance): problem_statement = instance['problem_statement'] instructions = read_query_prompt("patch_gen_instructions.txt") - graph_str = 'HERE WE SHOULD PASS THE TRIPLETS FROM GRAPHRAG' + retrieved_edges = await brute_force_triplet_search(problem_statement, top_k = 3) + + retrieved_edges_str = retrieved_edges_to_string(retrieved_edges) prompt = "\n".join([ - instructions, "", PATCH_EXAMPLE, "", - "This is the knowledge graph:", - graph_str + "These are the retrieved edges:", + retrieved_edges_str ]) - return 0 - - ''' :TODO: We have to find out how do we do the generation llm_client = get_llm_client() answer_prediction = await llm_client.acreate_structured_output( - text_input=problem_statement, - system_prompt=prompt, + text_input=prompt, + system_prompt=instructions, response_model=str, ) return answer_prediction - ''' + async def generate_patch_without_cognee(instance): problem_statement = instance['problem_statement'] @@ -111,12 +117,11 @@ async def get_preds(dataset, with_cognee=True): for instance in dataset: await pred_func(instance) - ''' preds = [{"instance_id": instance["instance_id"], "model_patch": await pred_func(instance), "model_name_or_path": model_name} for instance in dataset] - ''' - return 0 + + return preds async def main():