Merge branch 'feature/cog-971-preparing-swe-bench-run' of https://github.com/topoteretes/cognee into feature/cog-971-preparing-swe-bench-run

This commit is contained in:
hajdul88 2025-01-10 13:52:47 +01:00
commit c163e35afd
13 changed files with 80 additions and 15 deletions

View file

@ -94,7 +94,7 @@ jobs:
# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# # Run Scalene
# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
#
# # Compare profiling results
# - name: Compare profiling results
# run: |

View file

@ -131,6 +131,18 @@ class config:
message=f"'{key}' is not a valid attribute of the config."
)
@staticmethod
def set_graph_db_config(config_dict: dict) -> None:
"""
Updates the graph db config with values from config_dict.
"""
graph_db_config = get_graph_config()
for key, value in config_dict.items():
if hasattr(graph_db_config, key):
object.__setattr__(graph_db_config, key, value)
else:
raise AttributeError(message=f"'{key}' is not a valid attribute of the config.")
@staticmethod
def set_vector_db_config(config_dict: dict):
"""

View file

@ -15,6 +15,7 @@ from cognee.tasks.chunks import query_chunks
from cognee.tasks.graph import query_graph_connections
from cognee.tasks.summarization import query_summaries
from cognee.tasks.completion import query_completion
from cognee.tasks.completion import graph_query_completion
class SearchType(Enum):
@ -22,6 +23,7 @@ class SearchType(Enum):
INSIGHTS = "INSIGHTS"
CHUNKS = "CHUNKS"
COMPLETION = "COMPLETION"
GRAPH_COMPLETION = "GRAPH_COMPLETION"
async def search(
@ -65,6 +67,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list:
SearchType.INSIGHTS: query_graph_connections,
SearchType.CHUNKS: query_chunks,
SearchType.COMPLETION: query_completion,
SearchType.GRAPH_COMPLETION: graph_query_completion,
}
search_task = search_tasks.get(query_type)

View file

@ -0,0 +1 @@
Answer the question using the provided context. If the provided context is not connected to the question, just answer "The provided knowledge base does not contain the answer to the question". Be as brief as possible.

View file

@ -0,0 +1,2 @@
The question is: `{{ question }}`
and here is the context provided with a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet: `{{ context }}`

View file

@ -1 +1,2 @@
from .query_completion import query_completion
from .graph_query_completion import graph_query_completion

View file

@ -0,0 +1,46 @@
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.tasks.completion.exceptions import NoRelevantDataFound
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
def retrieved_edges_to_string(retrieved_edges: list) -> str:
edge_strings = []
for edge in retrieved_edges:
node1_string = edge.node1.attributes.get("text") or edge.node1.attributes.get("name")
node2_string = edge.node2.attributes.get("text") or edge.node2.attributes.get("name")
edge_string = edge.attributes["relationship_type"]
edge_str = f"{node1_string} -- {edge_string} -- {node2_string}"
edge_strings.append(edge_str)
return "\n---\n".join(edge_strings)
async def graph_query_completion(query: str) -> list:
"""
Parameters:
- query (str): The query string to compute.
Returns:
- list: Answer to the query.
"""
found_triplets = await brute_force_triplet_search(query, top_k=5)
if len(found_triplets) == 0:
raise NoRelevantDataFound
args = {
"question": query,
"context": retrieved_edges_to_string(found_triplets),
}
user_prompt = render_prompt("graph_context_for_question.txt", args)
system_prompt = read_query_prompt("answer_simple_question_restricted.txt")
llm_client = get_llm_client()
computed_answer = await llm_client.acreate_structured_output(
text_input=user_prompt,
system_prompt=system_prompt,
response_model=str,
)
return [computed_answer]

View file

@ -5,10 +5,10 @@ from uuid import NAMESPACE_OID, uuid5
from cognee.infrastructure.engine import DataPoint
from cognee.shared.CodeGraphEntities import CodeFile, CodePart
from cognee.tasks.repo_processor.extract_code_parts import extract_code_parts
import logging
logger = logging.getLogger("task:repo_processor")
logger = logging.getLogger(__name__)
def _add_code_parts_nodes_and_edges(code_file: CodeFile, part_type, code_parts) -> None:

View file

@ -1,9 +1,8 @@
from typing import Dict, List
import parso
import logging
logger = logging.getLogger("task:repo_processor")
logger = logging.getLogger(__name__)
def _extract_parts_from_module(module, parts_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:

View file

@ -9,10 +9,9 @@ import aiofiles
import jedi
import parso
from parso.tree import BaseNode
import logging
logger = logging.getLogger("task:repo_processor")
logger = logging.getLogger(__name__)
@contextmanager

View file

@ -9,7 +9,7 @@ from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.engine import DataPoint
from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk
logger = logging.getLogger("task:get_source_code_chunks")
logger = logging.getLogger(__name__)
def _count_tokens(tokenizer: tiktoken.Encoding, source_code: str) -> int:

View file

@ -4,7 +4,9 @@ import jedi
import parso
from tqdm import tqdm
from . import logger
import logging
logger = logging.getLogger(__name__)
_NODE_TYPE_MAP = {
"funcdef": "func_def",

View file

@ -1,8 +1,8 @@
import cognee
import asyncio
import logging
from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
from cognee.modules.retrieval.brute_force_triplet_search import format_triplets
from cognee.api.v1.search import SearchType
from cognee.shared.utils import setup_logging
job_1 = """
@ -185,14 +185,14 @@ async def main(enable_steps):
# Step 4: Query insights
if enable_steps.get("retriever"):
results = await brute_force_triplet_search(
"Who has the most experience with graphic design?"
search_results = await cognee.search(
SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
)
print(format_triplets(results))
print(search_results)
if __name__ == "__main__":
setup_logging(logging.ERROR)
setup_logging(logging.INFO)
rebuild_kg = True
retrieve = True