Merge branch 'feature/cog-971-preparing-swe-bench-run' of https://github.com/topoteretes/cognee into feature/cog-971-preparing-swe-bench-run

2025-01-10 13:52:47 +01:00 · 2025-01-10 13:52:47 +01:00 · c163e35afd
commit c163e35afd
parent 06e8d2268b 16155f084f
13 changed files with 80 additions and 15 deletions
--- a/.github/workflows/profiling.yaml
+++ b/.github/workflows/profiling.yaml
@ -94,7 +94,7 @@ jobs:
 #        chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
 #        # Run Scalene
 #        poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
-#
+
 #    # Compare profiling results
 #    - name: Compare profiling results
 #      run: |
--- a/cognee/api/v1/config/config.py
+++ b/cognee/api/v1/config/config.py
@ -131,6 +131,18 @@ class config:
                    message=f"'{key}' is not a valid attribute of the config."
                )

+    @staticmethod
+    def set_graph_db_config(config_dict: dict) -> None:
+        """
+        Updates the graph db config with values from config_dict.
+        """
+        graph_db_config = get_graph_config()
+        for key, value in config_dict.items():
+            if hasattr(graph_db_config, key):
+                object.__setattr__(graph_db_config, key, value)
+            else:
+                raise AttributeError(message=f"'{key}' is not a valid attribute of the config.")
+
    @staticmethod
    def set_vector_db_config(config_dict: dict):
        """
--- a/cognee/api/v1/search/search_v2.py
+++ b/cognee/api/v1/search/search_v2.py
@ -15,6 +15,7 @@ from cognee.tasks.chunks import query_chunks
 from cognee.tasks.graph import query_graph_connections
 from cognee.tasks.summarization import query_summaries
 from cognee.tasks.completion import query_completion
+from cognee.tasks.completion import graph_query_completion


 class SearchType(Enum):
@ -22,6 +23,7 @@ class SearchType(Enum):
    INSIGHTS = "INSIGHTS"
    CHUNKS = "CHUNKS"
    COMPLETION = "COMPLETION"
+    GRAPH_COMPLETION = "GRAPH_COMPLETION"


 async def search(
@ -65,6 +67,7 @@ async def specific_search(query_type: SearchType, query: str, user) -> list:
        SearchType.INSIGHTS: query_graph_connections,
        SearchType.CHUNKS: query_chunks,
        SearchType.COMPLETION: query_completion,
+        SearchType.GRAPH_COMPLETION: graph_query_completion,
    }

    search_task = search_tasks.get(query_type)
--- a/cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt
+++ b/cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt
@ -0,0 +1 @@
+Answer the question using the provided context. If the provided context is not connected to the question, just answer "The provided knowledge base does not contain the answer to the question". Be as brief as possible.
--- a/cognee/infrastructure/llm/prompts/graph_context_for_question.txt
+++ b/cognee/infrastructure/llm/prompts/graph_context_for_question.txt
@ -0,0 +1,2 @@
+The question is: `{{ question }}`
+and here is the context provided with a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet: `{{ context }}`
--- a/cognee/tasks/completion/init.py
+++ b/cognee/tasks/completion/init.py
@ -1 +1,2 @@
 from .query_completion import query_completion
+from .graph_query_completion import graph_query_completion
--- a/cognee/tasks/completion/graph_query_completion.py
+++ b/cognee/tasks/completion/graph_query_completion.py
@ -0,0 +1,46 @@
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.tasks.completion.exceptions import NoRelevantDataFound
+from cognee.infrastructure.llm.get_llm_client import get_llm_client
+from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
+
+
+def retrieved_edges_to_string(retrieved_edges: list) -> str:
+    edge_strings = []
+    for edge in retrieved_edges:
+        node1_string = edge.node1.attributes.get("text") or edge.node1.attributes.get("name")
+        node2_string = edge.node2.attributes.get("text") or edge.node2.attributes.get("name")
+        edge_string = edge.attributes["relationship_type"]
+        edge_str = f"{node1_string} -- {edge_string} -- {node2_string}"
+        edge_strings.append(edge_str)
+    return "\n---\n".join(edge_strings)
+
+
+async def graph_query_completion(query: str) -> list:
+    """
+    Parameters:
+    - query (str): The query string to compute.
+
+    Returns:
+    - list: Answer to the query.
+    """
+    found_triplets = await brute_force_triplet_search(query, top_k=5)
+
+    if len(found_triplets) == 0:
+        raise NoRelevantDataFound
+
+    args = {
+        "question": query,
+        "context": retrieved_edges_to_string(found_triplets),
+    }
+    user_prompt = render_prompt("graph_context_for_question.txt", args)
+    system_prompt = read_query_prompt("answer_simple_question_restricted.txt")
+
+    llm_client = get_llm_client()
+    computed_answer = await llm_client.acreate_structured_output(
+        text_input=user_prompt,
+        system_prompt=system_prompt,
+        response_model=str,
+    )
+
+    return [computed_answer]
--- a/cognee/tasks/repo_processor/expand_dependency_graph.py
+++ b/cognee/tasks/repo_processor/expand_dependency_graph.py
@ -5,10 +5,10 @@ from uuid import NAMESPACE_OID, uuid5
 from cognee.infrastructure.engine import DataPoint
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart
 from cognee.tasks.repo_processor.extract_code_parts import extract_code_parts
-
 import logging

-logger = logging.getLogger("task:repo_processor")
+logger = logging.getLogger(__name__)
+


 def _add_code_parts_nodes_and_edges(code_file: CodeFile, part_type, code_parts) -> None:
--- a/cognee/tasks/repo_processor/extract_code_parts.py
+++ b/cognee/tasks/repo_processor/extract_code_parts.py
@ -1,9 +1,8 @@
 from typing import Dict, List
 import parso
-
 import logging

-logger = logging.getLogger("task:repo_processor")
+logger = logging.getLogger(__name__)


 def _extract_parts_from_module(module, parts_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
--- a/cognee/tasks/repo_processor/get_local_dependencies.py
+++ b/cognee/tasks/repo_processor/get_local_dependencies.py
@ -9,10 +9,9 @@ import aiofiles
 import jedi
 import parso
 from parso.tree import BaseNode
-
 import logging

-logger = logging.getLogger("task:repo_processor")
+logger = logging.getLogger(__name__)


@contextmanager
--- a/cognee/tasks/repo_processor/get_source_code_chunks.py
+++ b/cognee/tasks/repo_processor/get_source_code_chunks.py
@ -9,7 +9,7 @@ from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine import DataPoint
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk

-logger = logging.getLogger("task:get_source_code_chunks")
+logger = logging.getLogger(__name__)


 def _count_tokens(tokenizer: tiktoken.Encoding, source_code: str) -> int:
--- a/cognee/tasks/repo_processor/top_down_repo_parse.py
+++ b/cognee/tasks/repo_processor/top_down_repo_parse.py
@ -4,7 +4,9 @@ import jedi
 import parso
 from tqdm import tqdm

-from . import logger
+import logging
+
+logger = logging.getLogger(__name__)

 _NODE_TYPE_MAP = {
    "funcdef": "func_def",
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@ -1,8 +1,8 @@
 import cognee
 import asyncio
 import logging
-from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
-from cognee.modules.retrieval.brute_force_triplet_search import format_triplets
+
+from cognee.api.v1.search import SearchType
 from cognee.shared.utils import setup_logging

 job_1 = """
@ -185,14 +185,14 @@ async def main(enable_steps):

    # Step 4: Query insights
    if enable_steps.get("retriever"):
-        results = await brute_force_triplet_search(
-            "Who has the most experience with graphic design?"
+        search_results = await cognee.search(
+            SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
        )
-        print(format_triplets(results))
+        print(search_results)


 if __name__ == "__main__":
-    setup_logging(logging.ERROR)
+    setup_logging(logging.INFO)

    rebuild_kg = True
    retrieve = True
				`@ -0,0 +1 @@`
				`Answer the question using the provided context. If the provided context is not connected to the question, just answer "The provided knowledge base does not contain the answer to the question". Be as brief as possible.`