diff --git a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt index ebbb03f75..3117ac9f1 100644 --- a/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt +++ b/cognee/infrastructure/llm/prompts/patch_gen_kg_instructions.txt @@ -1,3 +1,6 @@ -I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and -generate a single patch file that I can apply directly to this repository using git apply. -Please respond with a single patch file in the following format. \ No newline at end of file +You are a senior software engineer. I need you to solve this issue by looking at the provided context and +generate a single patch file that I can apply directly to this repository using git apply. +Additionally, please make sure that you provide code only with correct syntax and +you apply the patch on the relevant files (together with their path that you can try to find out from the github issue). Don't change the names of existing +functions or classes, as they may be referenced from other code. +Please respond only with a single patch file in the following format without adding any additional context or string. diff --git a/cognee/modules/retrieval/description_to_codepart_search.py b/cognee/modules/retrieval/description_to_codepart_search.py index fec17fb16..538f76a6e 100644 --- a/cognee/modules/retrieval/description_to_codepart_search.py +++ b/cognee/modules/retrieval/description_to_codepart_search.py @@ -8,20 +8,27 @@ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph from cognee.modules.users.methods import get_default_user from cognee.modules.users.models import User from cognee.shared.utils import send_telemetry +from cognee.api.v1.search import SearchType +from cognee.api.v1.search.search_v2 import search +from cognee.infrastructure.llm.get_llm_client import get_llm_client -async def code_description_to_code_part_search(query: str, user: User = None, top_k=5) -> list: +async def code_description_to_code_part_search( + query: str, include_docs=False, user: User = None, top_k=5 +) -> list: if user is None: user = await get_default_user() if user is None: raise PermissionError("No user found in the system. Please create a user.") - retrieved_codeparts = await code_description_to_code_part(query, user, top_k) + retrieved_codeparts = await code_description_to_code_part(query, user, top_k, include_docs) return retrieved_codeparts -async def code_description_to_code_part(query: str, user: User, top_k: int) -> List[str]: +async def code_description_to_code_part( + query: str, user: User, top_k: int, include_docs: bool +) -> List[str]: """ Maps a code description query to relevant code parts using a CodeGraph pipeline. @@ -29,6 +36,7 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L query (str): The search query describing the code parts. user (User): The user performing the search. top_k (int): Number of codegraph descriptions to match ( num of corresponding codeparts will be higher) + include_docs(bool): Boolean showing whether we have the docs in the graph or not Returns: Set[str]: A set of unique code parts matching the query. @@ -37,6 +45,7 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L ValueError: If arguments are invalid. RuntimeError: If an unexpected error occurs during execution. """ + print(include_docs) if not query or not isinstance(query, str): raise ValueError("The query must be a non-empty string.") if top_k <= 0 or not isinstance(top_k, int): @@ -55,6 +64,26 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L ) try: + if include_docs: + search_results = await search(SearchType.INSIGHTS, query_text=query) + + concatenated_descriptions = " ".join( + obj["description"] + for tpl in search_results + for obj in tpl + if isinstance(obj, dict) and "description" in obj + ) + + llm_client = get_llm_client() + context_from_documents = await llm_client.acreate_structured_output( + text_input=f"The retrieved context from documents" + f" is {concatenated_descriptions}.", + system_prompt="You are a Senior Software Engineer, summarize the context from documents" + f" in a way that it is gonna be provided next to codeparts as context" + f" while trying to solve this github issue connected to the project: {query}]", + response_model=str, + ) + code_summaries = await vector_engine.search( "code_summary_text", query_text=query, limit=top_k ) @@ -102,6 +131,9 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L for code_piece in code_pieces_to_return: context = context + code_piece.get_attribute("source_code") + if include_docs: + context = context_from_documents + context + return context except Exception as exec_error: diff --git a/evals/eval_swe_bench.py b/evals/eval_swe_bench.py index b5fcc616b..894acf1bb 100644 --- a/evals/eval_swe_bench.py +++ b/evals/eval_swe_bench.py @@ -14,8 +14,6 @@ from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.modules.retrieval.description_to_codepart_search import ( code_description_to_code_part_search, ) -from cognee.shared.utils import render_graph -from evals.eval_utils import download_github_repo, retrieved_edges_to_string def check_install_package(package_name): @@ -36,14 +34,17 @@ def check_install_package(package_name): async def generate_patch_with_cognee(instance): """repo_path = download_github_repo(instance, "../RAW_GIT_REPOS")""" + include_docs = True problem_statement = instance["problem_statement"] instructions = read_query_prompt("patch_gen_kg_instructions.txt") - repo_path = "/Users/laszlohajdu/Documents/GitHub/test/" - async for result in run_code_graph_pipeline(repo_path, include_docs=False): + repo_path = "/Users/laszlohajdu/Documents/GitHub/graph_rag/" + async for result in run_code_graph_pipeline(repo_path, include_docs=include_docs): print(result) - retrieved_codeparts = await code_description_to_code_part_search(problem_statement) + retrieved_codeparts = await code_description_to_code_part_search( + problem_statement, include_docs=include_docs + ) prompt = "\n".join( [ @@ -51,7 +52,7 @@ async def generate_patch_with_cognee(instance): "", PATCH_EXAMPLE, "", - "These are the retrieved edges:", + "This is the additional context to solve the problem (description from documentation together with codeparts):", retrieved_codeparts, ] )