feat: implements code retreiver
This commit is contained in:
parent
9604d95ba5
commit
6177d04b44
3 changed files with 48 additions and 12 deletions
|
|
@ -1,3 +1,6 @@
|
||||||
I need you to solve this issue by looking at the provided edges retrieved from a knowledge graph and
|
You are a senior software engineer. I need you to solve this issue by looking at the provided context and
|
||||||
generate a single patch file that I can apply directly to this repository using git apply.
|
generate a single patch file that I can apply directly to this repository using git apply.
|
||||||
Please respond with a single patch file in the following format.
|
Additionally, please make sure that you provide code only with correct syntax and
|
||||||
|
you apply the patch on the relevant files (together with their path that you can try to find out from the github issue). Don't change the names of existing
|
||||||
|
functions or classes, as they may be referenced from other code.
|
||||||
|
Please respond only with a single patch file in the following format without adding any additional context or string.
|
||||||
|
|
|
||||||
|
|
@ -8,20 +8,27 @@ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.shared.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
|
from cognee.api.v1.search.search_v2 import search
|
||||||
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
|
||||||
|
|
||||||
async def code_description_to_code_part_search(query: str, user: User = None, top_k=5) -> list:
|
async def code_description_to_code_part_search(
|
||||||
|
query: str, include_docs=False, user: User = None, top_k=5
|
||||||
|
) -> list:
|
||||||
if user is None:
|
if user is None:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
if user is None:
|
if user is None:
|
||||||
raise PermissionError("No user found in the system. Please create a user.")
|
raise PermissionError("No user found in the system. Please create a user.")
|
||||||
|
|
||||||
retrieved_codeparts = await code_description_to_code_part(query, user, top_k)
|
retrieved_codeparts = await code_description_to_code_part(query, user, top_k, include_docs)
|
||||||
return retrieved_codeparts
|
return retrieved_codeparts
|
||||||
|
|
||||||
|
|
||||||
async def code_description_to_code_part(query: str, user: User, top_k: int) -> List[str]:
|
async def code_description_to_code_part(
|
||||||
|
query: str, user: User, top_k: int, include_docs: bool
|
||||||
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Maps a code description query to relevant code parts using a CodeGraph pipeline.
|
Maps a code description query to relevant code parts using a CodeGraph pipeline.
|
||||||
|
|
||||||
|
|
@ -29,6 +36,7 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
|
||||||
query (str): The search query describing the code parts.
|
query (str): The search query describing the code parts.
|
||||||
user (User): The user performing the search.
|
user (User): The user performing the search.
|
||||||
top_k (int): Number of codegraph descriptions to match ( num of corresponding codeparts will be higher)
|
top_k (int): Number of codegraph descriptions to match ( num of corresponding codeparts will be higher)
|
||||||
|
include_docs(bool): Boolean showing whether we have the docs in the graph or not
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Set[str]: A set of unique code parts matching the query.
|
Set[str]: A set of unique code parts matching the query.
|
||||||
|
|
@ -37,6 +45,7 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
|
||||||
ValueError: If arguments are invalid.
|
ValueError: If arguments are invalid.
|
||||||
RuntimeError: If an unexpected error occurs during execution.
|
RuntimeError: If an unexpected error occurs during execution.
|
||||||
"""
|
"""
|
||||||
|
print(include_docs)
|
||||||
if not query or not isinstance(query, str):
|
if not query or not isinstance(query, str):
|
||||||
raise ValueError("The query must be a non-empty string.")
|
raise ValueError("The query must be a non-empty string.")
|
||||||
if top_k <= 0 or not isinstance(top_k, int):
|
if top_k <= 0 or not isinstance(top_k, int):
|
||||||
|
|
@ -55,6 +64,26 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if include_docs:
|
||||||
|
search_results = await search(SearchType.INSIGHTS, query_text=query)
|
||||||
|
|
||||||
|
concatenated_descriptions = " ".join(
|
||||||
|
obj["description"]
|
||||||
|
for tpl in search_results
|
||||||
|
for obj in tpl
|
||||||
|
if isinstance(obj, dict) and "description" in obj
|
||||||
|
)
|
||||||
|
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
context_from_documents = await llm_client.acreate_structured_output(
|
||||||
|
text_input=f"The retrieved context from documents"
|
||||||
|
f" is {concatenated_descriptions}.",
|
||||||
|
system_prompt="You are a Senior Software Engineer, summarize the context from documents"
|
||||||
|
f" in a way that it is gonna be provided next to codeparts as context"
|
||||||
|
f" while trying to solve this github issue connected to the project: {query}]",
|
||||||
|
response_model=str,
|
||||||
|
)
|
||||||
|
|
||||||
code_summaries = await vector_engine.search(
|
code_summaries = await vector_engine.search(
|
||||||
"code_summary_text", query_text=query, limit=top_k
|
"code_summary_text", query_text=query, limit=top_k
|
||||||
)
|
)
|
||||||
|
|
@ -102,6 +131,9 @@ async def code_description_to_code_part(query: str, user: User, top_k: int) -> L
|
||||||
for code_piece in code_pieces_to_return:
|
for code_piece in code_pieces_to_return:
|
||||||
context = context + code_piece.get_attribute("source_code")
|
context = context + code_piece.get_attribute("source_code")
|
||||||
|
|
||||||
|
if include_docs:
|
||||||
|
context = context_from_documents + context
|
||||||
|
|
||||||
return context
|
return context
|
||||||
|
|
||||||
except Exception as exec_error:
|
except Exception as exec_error:
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,6 @@ from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||||
from cognee.modules.retrieval.description_to_codepart_search import (
|
from cognee.modules.retrieval.description_to_codepart_search import (
|
||||||
code_description_to_code_part_search,
|
code_description_to_code_part_search,
|
||||||
)
|
)
|
||||||
from cognee.shared.utils import render_graph
|
|
||||||
from evals.eval_utils import download_github_repo, retrieved_edges_to_string
|
|
||||||
|
|
||||||
|
|
||||||
def check_install_package(package_name):
|
def check_install_package(package_name):
|
||||||
|
|
@ -36,14 +34,17 @@ def check_install_package(package_name):
|
||||||
|
|
||||||
async def generate_patch_with_cognee(instance):
|
async def generate_patch_with_cognee(instance):
|
||||||
"""repo_path = download_github_repo(instance, "../RAW_GIT_REPOS")"""
|
"""repo_path = download_github_repo(instance, "../RAW_GIT_REPOS")"""
|
||||||
|
include_docs = True
|
||||||
problem_statement = instance["problem_statement"]
|
problem_statement = instance["problem_statement"]
|
||||||
instructions = read_query_prompt("patch_gen_kg_instructions.txt")
|
instructions = read_query_prompt("patch_gen_kg_instructions.txt")
|
||||||
|
|
||||||
repo_path = "/Users/laszlohajdu/Documents/GitHub/test/"
|
repo_path = "/Users/laszlohajdu/Documents/GitHub/graph_rag/"
|
||||||
async for result in run_code_graph_pipeline(repo_path, include_docs=False):
|
async for result in run_code_graph_pipeline(repo_path, include_docs=include_docs):
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
retrieved_codeparts = await code_description_to_code_part_search(problem_statement)
|
retrieved_codeparts = await code_description_to_code_part_search(
|
||||||
|
problem_statement, include_docs=include_docs
|
||||||
|
)
|
||||||
|
|
||||||
prompt = "\n".join(
|
prompt = "\n".join(
|
||||||
[
|
[
|
||||||
|
|
@ -51,7 +52,7 @@ async def generate_patch_with_cognee(instance):
|
||||||
"<patch>",
|
"<patch>",
|
||||||
PATCH_EXAMPLE,
|
PATCH_EXAMPLE,
|
||||||
"</patch>",
|
"</patch>",
|
||||||
"These are the retrieved edges:",
|
"This is the additional context to solve the problem (description from documentation together with codeparts):",
|
||||||
retrieved_codeparts,
|
retrieved_codeparts,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue