Merge remote-tracking branch 'origin/dev' into feature/cog-2078-cognee-ui-refactor

2025-05-25 13:11:21 +02:00 · 2025-05-25 13:11:21 +02:00 · ef929ba442
commit ef929ba442
parent 342cbc9461 4650c9c638
42 changed files with 592 additions and 238 deletions
--- a/.data/code/example.txt
+++ b/.data/code/example.txt
@ -1,28 +0,0 @@
 '''
 	Given a string, find the length of the longest substring without repeating characters.
 	Examples:
 	Given "abcabcbb", the answer is "abc", which the length is 3.
 	Given "bbbbb", the answer is "b", with the length of 1.
 	Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
 '''
 class Solution(object):
    def lengthOfLongestSubstring(self, s):
        """
        :type s: str
        :rtype: int
        """
        mapSet = {}
        start, result = 0, 0
        for end in range(len(s)):
        	if s[end] in mapSet:
        		start = max(mapSet[s[end]], start)
        	result = max(result, end-start+1)
        	mapSet[s[end]] = end+1
        return result
--- a/.data/short_stories/soldiers-home.pdf
+++ b/.data/short_stories/soldiers-home.pdf
--- a/README.md
+++ b/README.md
@ -35,11 +35,11 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
  <p align="center">
  🌐 Available Languages
  :
-  <a href="community/README.pt.md">🇵🇹 Português</a>
+  <a href="assets/community/README.pt.md">🇵🇹 Português</a>
  ·
-  <a href="community/README.zh.md">🇨🇳 [中文]</a>
+  <a href="assets/community/README.zh.md">🇨🇳 [中文]</a>
  ·
-  <a href="community/README.ru.md">🇷🇺 Русский</a>
+  <a href="assets/community/README.ru.md">🇷🇺 Русский</a>
  </p>
--- a/assets/community/README.pt.md
+++ b/assets/community/README.pt.md
--- a/assets/community/README.ru.md
+++ b/assets/community/README.ru.md
--- a/assets/community/README.zh.md
+++ b/assets/community/README.zh.md
--- a/assets/community/cognee_benefits_zh.JPG
+++ b/assets/community/cognee_benefits_zh.JPG
--- a/assets/community/cognee_diagram_zh.JPG
+++ b/assets/community/cognee_diagram_zh.JPG
--- a/assets/community/graph_visualization_pt.png
+++ b/assets/community/graph_visualization_pt.png
--- a/assets/community/graph_visualization_ru.png
+++ b/assets/community/graph_visualization_ru.png
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -8,7 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
    # For local cognee repo usage remove comment bellow and add absolute path to cognee
    #"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
-    "cognee[postgres,codegraph,gemini,huggingface,docs]==0.1.40",
+    "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.1.40",
    "fastmcp>=1.0",
    "mcp==1.5.0",
    "uv>=0.6.3",
--- a/cognee/eval_framework/answer_generation/answer_generation_executor.py
+++ b/cognee/eval_framework/answer_generation/answer_generation_executor.py
@ -1,5 +1,9 @@
-from typing import List, Dict
+from typing import List, Dict, Any
 from cognee.modules.retrieval.completion_retriever import CompletionRetriever
 from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
    GraphCompletionContextExtensionRetriever,
 )
 from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.graph_summary_completion_retriever import (
    GraphSummaryCompletionRetriever,
@ -8,8 +12,10 @@ from cognee.modules.retrieval.graph_summary_completion_retriever import (
 from cognee.modules.retrieval.base_retriever import BaseRetriever
-retriever_options: Dict[str, BaseRetriever] = {
+retriever_options: Dict[str, Any] = {
    "cognee_graph_completion": GraphCompletionRetriever,
    "cognee_graph_completion_cot": GraphCompletionCotRetriever,
    "cognee_graph_completion_context_extension": GraphCompletionContextExtensionRetriever,
    "cognee_completion": CompletionRetriever,
    "graph_summary_completion": GraphSummaryCompletionRetriever,
 }
--- a/cognee/eval_framework/eval_config.py
+++ b/cognee/eval_framework/eval_config.py
@ -14,9 +14,7 @@ class EvalConfig(BaseSettings):
    # Question answering params
    answering_questions: bool = True
-    qa_engine: str = (
+    qa_engine: str = "cognee_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
        "cognee_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion'
    )
    # Evaluation params
    evaluating_answers: bool = True
--- a/cognee/eval_framework/modal_eval_dashboard.py
+++ b/cognee/eval_framework/modal_eval_dashboard.py
@ -45,6 +45,8 @@ def run():
 # Streamlit Dashboard Application Logic
 # ----------------------------------------------------------------------------
 def main():
    metrics_volume.reload()
    st.set_page_config(page_title="Metrics Dashboard", layout="wide")
    st.title("📊 Cognee Evaluations Dashboard")
--- a/cognee/infrastructure/llm/prompts/cot_followup_system_prompt.txt
+++ b/cognee/infrastructure/llm/prompts/cot_followup_system_prompt.txt
@ -0,0 +1,3 @@
 You are a helpful assistant whose job is to ask exactly one clarifying follow-up question,
 to collect the missing piece of information needed to fully answer the user’s original query.
 Respond with the question only (no extra text, no punctuation beyond what’s needed).
--- a/cognee/infrastructure/llm/prompts/cot_followup_user_prompt.txt
+++ b/cognee/infrastructure/llm/prompts/cot_followup_user_prompt.txt
@ -0,0 +1,14 @@
 Based on the following, ask exactly one question that would directly resolve the gap identified in the validation reasoning and allow a valid answer.
 Think in a way that with the followup question you are exploring a knowledge graph which contains entities, entity types and document chunks
 <QUERY>
 `{{ query}}`
 </QUERY>
 <ANSWER>
 `{{ answer }}`
 </ANSWER>
 <REASONING>
 `{{ reasoning }}`
 </REASONING>
--- a/cognee/infrastructure/llm/prompts/cot_validation_system_prompt.txt
+++ b/cognee/infrastructure/llm/prompts/cot_validation_system_prompt.txt
@ -0,0 +1,2 @@
 You are a helpful agent who are allowed to use only the provided question answer and context.
 I want to you find reasoning what is missing from the context or why the answer is not answering the question or not correct strictly based on the context.
--- a/cognee/infrastructure/llm/prompts/cot_validation_user_prompt.txt
+++ b/cognee/infrastructure/llm/prompts/cot_validation_user_prompt.txt
@ -0,0 +1,11 @@
 <QUESTION>
 `{{ query}}`
 </QUESTION>
 <ANSWER>
 `{{ answer }}`
 </ANSWER>
 <CONTEXT>
 `{{ context }}`
 </CONTEXT>
--- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py
@ -0,0 +1,74 @@
 from typing import Any, Optional, List
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.utils.completion import generate_completion
 from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
 logger = get_logger()
 class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
    def __init__(
        self,
        user_prompt_path: str = "graph_context_for_question.txt",
        system_prompt_path: str = "answer_simple_question.txt",
        top_k: Optional[int] = 5,
    ):
        super().__init__(
            user_prompt_path=user_prompt_path,
            system_prompt_path=system_prompt_path,
            top_k=top_k,
        )
    async def get_completion(
        self, query: str, context: Optional[Any] = None, context_extension_rounds=4
    ) -> List[str]:
        triplets = []
        if context is None:
            triplets += await self.get_triplets(query)
            context = await self.resolve_edges_to_text(triplets)
        round_idx = 1
        while round_idx <= context_extension_rounds:
            prev_size = len(triplets)
            logger.info(
                f"Context extension: round {round_idx} - generating next graph locational query."
            )
            completion = await generate_completion(
                query=query,
                context=context,
                user_prompt_path=self.user_prompt_path,
                system_prompt_path=self.system_prompt_path,
            )
            triplets += await self.get_triplets(completion)
            triplets = list(set(triplets))
            context = await self.resolve_edges_to_text(triplets)
            num_triplets = len(triplets)
            if num_triplets == prev_size:
                logger.info(
                    f"Context extension: round {round_idx} – no new triplets found; stopping early."
                )
                break
            logger.info(
                f"Context extension: round {round_idx} - "
                f"number of unique retrieved triplets: {num_triplets}"
            )
            round_idx += 1
        answer = await generate_completion(
            query=query,
            context=context,
            user_prompt_path=self.user_prompt_path,
            system_prompt_path=self.system_prompt_path,
        )
        return [answer]
--- a/cognee/modules/retrieval/graph_completion_cot_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py
@ -0,0 +1,84 @@
 from typing import Any, Optional, List
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.utils.completion import generate_completion
 from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
 logger = get_logger()
 class GraphCompletionCotRetriever(GraphCompletionRetriever):
    def __init__(
        self,
        user_prompt_path: str = "graph_context_for_question.txt",
        system_prompt_path: str = "answer_simple_question.txt",
        validation_user_prompt_path: str = "cot_validation_user_prompt.txt",
        validation_system_prompt_path: str = "cot_validation_system_prompt.txt",
        followup_system_prompt_path: str = "cot_followup_system_prompt.txt",
        followup_user_prompt_path: str = "cot_followup_user_prompt.txt",
        top_k: Optional[int] = 5,
    ):
        super().__init__(
            user_prompt_path=user_prompt_path,
            system_prompt_path=system_prompt_path,
            top_k=top_k,
        )
        self.validation_system_prompt_path = validation_system_prompt_path
        self.validation_user_prompt_path = validation_user_prompt_path
        self.followup_system_prompt_path = followup_system_prompt_path
        self.followup_user_prompt_path = followup_user_prompt_path
    async def get_completion(
        self, query: str, context: Optional[Any] = None, max_iter=4
    ) -> List[str]:
        llm_client = get_llm_client()
        followup_question = ""
        triplets = []
        answer = [""]
        for round_idx in range(max_iter + 1):
            if round_idx == 0:
                if context is None:
                    context = await self.get_context(query)
            else:
                triplets += await self.get_triplets(followup_question)
                context = await self.resolve_edges_to_text(list(set(triplets)))
            answer = await generate_completion(
                query=query,
                context=context,
                user_prompt_path=self.user_prompt_path,
                system_prompt_path=self.system_prompt_path,
            )
            logger.info(f"Chain-of-thought: round {round_idx} - answer: {answer}")
            if round_idx < max_iter:
                valid_args = {"query": query, "answer": answer, "context": context}
                valid_user_prompt = render_prompt(
                    filename=self.validation_user_prompt_path, context=valid_args
                )
                valid_system_prompt = read_query_prompt(
                    prompt_file_name=self.validation_system_prompt_path
                )
                reasoning = await llm_client.acreate_structured_output(
                    text_input=valid_user_prompt,
                    system_prompt=valid_system_prompt,
                    response_model=str,
                )
                followup_args = {"query": query, "answer": answer, "reasoning": reasoning}
                followup_prompt = render_prompt(
                    filename=self.followup_user_prompt_path, context=followup_args
                )
                followup_system = read_query_prompt(
                    prompt_file_name=self.followup_system_prompt_path
                )
                followup_question = await llm_client.acreate_structured_output(
                    text_input=followup_prompt, system_prompt=followup_system, response_model=str
                )
                logger.info(
                    f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}"
                )
        return [answer]
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -11,6 +11,10 @@ from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionR
 from cognee.modules.retrieval.graph_summary_completion_retriever import (
    GraphSummaryCompletionRetriever,
 )
 from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
 from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
    GraphCompletionContextExtensionRetriever,
 )
 from cognee.modules.retrieval.code_retriever import CodeRetriever
 from cognee.modules.retrieval.cypher_search_retriever import CypherSearchRetriever
 from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageRetriever
@ -19,7 +23,7 @@ from cognee.modules.storage.utils import JSONEncoder
 from cognee.modules.users.models import User
 from cognee.modules.users.permissions.methods import get_document_ids_for_user
 from cognee.shared.utils import send_telemetry
-from ..operations import log_query, log_result
+from cognee.modules.search.operations import log_query, log_result
 async def search(
@ -70,6 +74,14 @@ async def specific_search(
            system_prompt_path=system_prompt_path,
            top_k=top_k,
        ).get_completion,
        SearchType.GRAPH_COMPLETION_COT: GraphCompletionCotRetriever(
            system_prompt_path=system_prompt_path,
            top_k=top_k,
        ).get_completion,
        SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: GraphCompletionContextExtensionRetriever(
            system_prompt_path=system_prompt_path,
            top_k=top_k,
        ).get_completion,
        SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
            system_prompt_path=system_prompt_path, top_k=top_k
        ).get_completion,
--- a/cognee/modules/search/types/SearchType.py
+++ b/cognee/modules/search/types/SearchType.py
@ -11,3 +11,5 @@ class SearchType(Enum):
    CODE = "CODE"
    CYPHER = "CYPHER"
    NATURAL_LANGUAGE = "NATURAL_LANGUAGE"
    GRAPH_COMPLETION_COT = "GRAPH_COMPLETION_COT"
    GRAPH_COMPLETION_CONTEXT_EXTENSION = "GRAPH_COMPLETION_CONTEXT_EXTENSION"
--- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py
+++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py
@ -0,0 +1,185 @@
 import os
 import pytest
 import pathlib
 from typing import Optional, Union
 import cognee
 from cognee.low_level import setup, DataPoint
 from cognee.tasks.storage import add_data_points
 from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
 from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
    GraphCompletionContextExtensionRetriever,
 )
 class TestGraphCompletionRetriever:
    @pytest.mark.asyncio
    async def test_graph_completion_extension_context_simple(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        class Company(DataPoint):
            name: str
        class Person(DataPoint):
            name: str
            works_for: Company
        company1 = Company(name="Figma")
        company2 = Company(name="Canva")
        person1 = Person(name="Steve Rodger", works_for=company1)
        person2 = Person(name="Ike Loma", works_for=company1)
        person3 = Person(name="Jason Statham", works_for=company1)
        person4 = Person(name="Mike Broski", works_for=company2)
        person5 = Person(name="Christina Mayer", works_for=company2)
        entities = [company1, company2, person1, person2, person3, person4, person5]
        await add_data_points(entities)
        retriever = GraphCompletionContextExtensionRetriever()
        context = await retriever.get_context("Who works at Canva?")
        assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
        assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
        answer = await retriever.get_completion("Who works at Canva?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
    @pytest.mark.asyncio
    async def test_graph_completion_extension_context_complex(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_completion_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_completion_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        class Company(DataPoint):
            name: str
            metadata: dict = {"index_fields": ["name"]}
        class Car(DataPoint):
            brand: str
            model: str
            year: int
        class Location(DataPoint):
            country: str
            city: str
        class Home(DataPoint):
            location: Location
            rooms: int
            sqm: int
        class Person(DataPoint):
            name: str
            works_for: Company
            owns: Optional[list[Union[Car, Home]]] = None
        company1 = Company(name="Figma")
        company2 = Company(name="Canva")
        person1 = Person(name="Mike Rodger", works_for=company1)
        person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
        person2 = Person(name="Ike Loma", works_for=company1)
        person2.owns = [
            Car(brand="Tesla", model="Model S", year=2021),
            Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
        ]
        person3 = Person(name="Jason Statham", works_for=company1)
        person4 = Person(name="Mike Broski", works_for=company2)
        person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
        person5 = Person(name="Christina Mayer", works_for=company2)
        person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
        entities = [company1, company2, person1, person2, person3, person4, person5]
        await add_data_points(entities)
        retriever = GraphCompletionContextExtensionRetriever(top_k=20)
        context = await retriever.get_context("Who works at Figma?")
        print(context)
        assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
        assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
        assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
        answer = await retriever.get_completion("Who works at Figma?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
    @pytest.mark.asyncio
    async def test_get_graph_completion_extension_context_on_empty_graph(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_completion_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_completion_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        retriever = GraphCompletionContextExtensionRetriever()
        with pytest.raises(DatabaseNotCreatedError):
            await retriever.get_context("Who works at Figma?")
        await setup()
        context = await retriever.get_context("Who works at Figma?")
        assert context == "", "Context should be empty on an empty graph"
        answer = await retriever.get_completion("Who works at Figma?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
 if __name__ == "__main__":
    from asyncio import run
    test = TestGraphCompletionRetriever()
    async def main():
        await test.test_graph_completion_context_simple()
        await test.test_graph_completion_context_complex()
        await test.test_get_graph_completion_context_on_empty_graph()
    run(main())
--- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py
+++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py
@ -0,0 +1,183 @@
 import os
 import pytest
 import pathlib
 from typing import Optional, Union
 import cognee
 from cognee.low_level import setup, DataPoint
 from cognee.tasks.storage import add_data_points
 from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
 from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
 class TestGraphCompletionRetriever:
    @pytest.mark.asyncio
    async def test_graph_completion_cot_context_simple(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        class Company(DataPoint):
            name: str
        class Person(DataPoint):
            name: str
            works_for: Company
        company1 = Company(name="Figma")
        company2 = Company(name="Canva")
        person1 = Person(name="Steve Rodger", works_for=company1)
        person2 = Person(name="Ike Loma", works_for=company1)
        person3 = Person(name="Jason Statham", works_for=company1)
        person4 = Person(name="Mike Broski", works_for=company2)
        person5 = Person(name="Christina Mayer", works_for=company2)
        entities = [company1, company2, person1, person2, person3, person4, person5]
        await add_data_points(entities)
        retriever = GraphCompletionCotRetriever()
        context = await retriever.get_context("Who works at Canva?")
        assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
        assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
        answer = await retriever.get_completion("Who works at Canva?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
    @pytest.mark.asyncio
    async def test_graph_completion_cot_context_complex(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_completion_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_completion_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        await setup()
        class Company(DataPoint):
            name: str
            metadata: dict = {"index_fields": ["name"]}
        class Car(DataPoint):
            brand: str
            model: str
            year: int
        class Location(DataPoint):
            country: str
            city: str
        class Home(DataPoint):
            location: Location
            rooms: int
            sqm: int
        class Person(DataPoint):
            name: str
            works_for: Company
            owns: Optional[list[Union[Car, Home]]] = None
        company1 = Company(name="Figma")
        company2 = Company(name="Canva")
        person1 = Person(name="Mike Rodger", works_for=company1)
        person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
        person2 = Person(name="Ike Loma", works_for=company1)
        person2.owns = [
            Car(brand="Tesla", model="Model S", year=2021),
            Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
        ]
        person3 = Person(name="Jason Statham", works_for=company1)
        person4 = Person(name="Mike Broski", works_for=company2)
        person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
        person5 = Person(name="Christina Mayer", works_for=company2)
        person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
        entities = [company1, company2, person1, person2, person3, person4, person5]
        await add_data_points(entities)
        retriever = GraphCompletionCotRetriever(top_k=20)
        context = await retriever.get_context("Who works at Figma?")
        print(context)
        assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
        assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
        assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
        answer = await retriever.get_completion("Who works at Figma?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
    @pytest.mark.asyncio
    async def test_get_graph_completion_cot_context_on_empty_graph(self):
        system_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".cognee_system/test_graph_completion_context"
        )
        cognee.config.system_root_directory(system_directory_path)
        data_directory_path = os.path.join(
            pathlib.Path(__file__).parent, ".data_storage/test_graph_completion_context"
        )
        cognee.config.data_root_directory(data_directory_path)
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        retriever = GraphCompletionCotRetriever()
        with pytest.raises(DatabaseNotCreatedError):
            await retriever.get_context("Who works at Figma?")
        await setup()
        context = await retriever.get_context("Who works at Figma?")
        assert context == "", "Context should be empty on an empty graph"
        answer = await retriever.get_completion("Who works at Figma?")
        assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
        assert all(isinstance(item, str) and item.strip() for item in answer), (
            "Answer must contain only non-empty strings"
        )
 if __name__ == "__main__":
    from asyncio import run
    test = TestGraphCompletionRetriever()
    async def main():
        await test.test_graph_completion_context_simple()
        await test.test_graph_completion_context_complex()
        await test.test_get_graph_completion_context_on_empty_graph()
    run(main())
--- a/deployment/helm/Chart.yaml
+++ b/deployment/helm/Chart.yaml
--- a/deployment/helm/Dockerfile
+++ b/deployment/helm/Dockerfile
--- a/deployment/helm/README.md
+++ b/deployment/helm/README.md
--- a/deployment/helm/docker-compose-helm.yml
+++ b/deployment/helm/docker-compose-helm.yml
--- a/deployment/helm/templates/cognee_deployment.yaml
+++ b/deployment/helm/templates/cognee_deployment.yaml
--- a/deployment/helm/templates/cognee_service.yaml
+++ b/deployment/helm/templates/cognee_service.yaml
--- a/deployment/helm/templates/postgres_deployment.yaml
+++ b/deployment/helm/templates/postgres_deployment.yaml
--- a/deployment/helm/templates/postgres_pvc.yaml
+++ b/deployment/helm/templates/postgres_pvc.yaml
--- a/deployment/helm/templates/postgres_service.yaml
+++ b/deployment/helm/templates/postgres_service.yaml
--- a/deployment/helm/values.yaml
+++ b/deployment/helm/values.yaml
--- a/examples/data/multimedia/example.png
+++ b/examples/data/multimedia/example.png
--- a/examples/data/multimedia/text_to_speech.mp3
+++ b/examples/data/multimedia/text_to_speech.mp3
--- a/examples/python/multimedia_example.py
+++ b/examples/python/multimedia_example.py
@ -21,11 +21,11 @@ async def main():
    # and description of these files
    mp3_file_path = os.path.join(
        pathlib.Path(__file__).parent.parent.parent,
-        ".data/multimedia/text_to_speech.mp3",
+        "examples/data/multimedia/text_to_speech.mp3",
    )
    png_file_path = os.path.join(
        pathlib.Path(__file__).parent.parent.parent,
-        ".data/multimedia/example.png",
+        "examples/data/multimedia/example.png",
    )
    # Add the files, and make it available for cognify
--- a/notebooks/cognee_multimedia_demo.ipynb
+++ b/notebooks/cognee_multimedia_demo.ipynb
@ -21,10 +21,10 @@
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import os\n",
    "import pathlib\n",
@ -34,12 +34,12 @@
    "mp3_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
-    "    \".data/multimedia/text_to_speech.mp3\",\n",
+    "    \"examples/data/multimedia/text_to_speech.mp3\",\n",
    ")\n",
    "png_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
-    "    \".data/multimedia/example.png\",\n",
+    "    \"examples/data/multimedia/example.png\",\n",
    ")"
   ]
  },
--- a/profiling/graph_pydantic_conversion/benchmark_function.py
+++ b/profiling/graph_pydantic_conversion/benchmark_function.py
@ -1,62 +0,0 @@
 import statistics
 import time
 import tracemalloc
 from typing import Any, Callable, Dict
 import psutil
 def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
    """
    Benchmark a function for memory usage and computational performance.
    Args:
        func: Function to benchmark
        *args: Arguments to pass to the function
        num_runs: Number of times to run the benchmark
    Returns:
        Dictionary containing benchmark metrics
    """
    execution_times = []
    peak_memory_usages = []
    cpu_percentages = []
    process = psutil.Process()
    for _ in range(num_runs):
        # Start memory tracking
        tracemalloc.start()
        # Measure execution time and CPU usage
        start_time = time.perf_counter()
        start_cpu_time = process.cpu_times()
        end_cpu_time = process.cpu_times()
        end_time = time.perf_counter()
        # Calculate metrics
        execution_time = end_time - start_time
        cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
            start_cpu_time.user + start_cpu_time.system
        )
        current, peak = tracemalloc.get_traced_memory()
        # Store results
        execution_times.append(execution_time)
        peak_memory_usages.append(peak / 1024 / 1024)  # Convert to MB
        cpu_percentages.append((cpu_time / execution_time) * 100)
        tracemalloc.stop()
    analysis = {
        "mean_execution_time": statistics.mean(execution_times),
        "mean_peak_memory_mb": statistics.mean(peak_memory_usages),
        "mean_cpu_percent": statistics.mean(cpu_percentages),
        "num_runs": num_runs,
    }
    if num_runs > 1:
        analysis["std_execution_time"] = statistics.stdev(execution_times)
    return analysis
--- a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
+++ b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
@ -1,63 +0,0 @@
 import argparse
 import asyncio
 from .benchmark_function import benchmark_function
 from cognee.modules.graph.utils import get_graph_from_model
 from cognee.tests.unit.interfaces.graph.util import (
    PERSON_NAMES,
    create_organization_recursive,
 )
 # Example usage:
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Benchmark graph model with configurable recursive depth"
    )
    parser.add_argument(
        "--recursive-depth",
        type=int,
        default=3,
        help="Recursive depth for graph generation (default: 3)",
    )
    parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
    args = parser.parse_args()
    society = create_organization_recursive(
        "society", "Society", PERSON_NAMES, args.recursive_depth
    )
    added_nodes = {}
    added_edges = {}
    visited_properties = {}
    nodes, edges = asyncio.run(
        get_graph_from_model(
            society,
            added_nodes=added_nodes,
            added_edges=added_edges,
            visited_properties=visited_properties,
        )
    )
    def get_graph_from_model_sync(model):
        added_nodes = {}
        added_edges = {}
        visited_properties = {}
        return asyncio.run(
            get_graph_from_model(
                model,
                added_nodes=added_nodes,
                added_edges=added_edges,
                visited_properties=visited_properties,
            )
        )
    results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
    print("\nBenchmark Results:")
    print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
    print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
    print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
    print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
    if "std_execution_time" in results:
        print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
--- a/profiling/util/DummyEmbeddingEngine.py
+++ b/profiling/util/DummyEmbeddingEngine.py
@ -1,10 +0,0 @@
 import numpy as np
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 class DummyEmbeddingEngine(EmbeddingEngine):
    async def embed_text(self, text: list[str]) -> list[list[float]]:
        return list(list(np.random.randn(3072)))
    def get_vector_size(self) -> int:
        return 3072
--- a/profiling/util/DummyLLMAdapter.py
+++ b/profiling/util/DummyLLMAdapter.py
@ -1,59 +0,0 @@
 from typing import Type
 from uuid import uuid4
 import spacy
 import textacy
 from pydantic import BaseModel
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
 class DummyLLMAdapter(LLMInterface):
    nlp = spacy.load("en_core_web_sm")
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
        if str(response_model) == "<class 'cognee.shared.data_models.SummarizedContent'>":
            return dummy_summarize_content(text_input)
        elif str(response_model) == "<class 'cognee.shared.data_models.KnowledgeGraph'>":
            return dummy_extract_knowledge_graph(text_input, self.nlp)
        else:
            raise Exception(
                "Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
            )
 def dummy_extract_knowledge_graph(text, nlp):
    doc = nlp(text)
    triples = list(textacy.extract.subject_verb_object_triples(doc))
    nodes = {}
    edges = []
    for triple in triples:
        source = "_".join([str(e) for e in triple.subject])
        target = "_".join([str(e) for e in triple.object])
        nodes[source] = nodes.get(
            source, Node(id=str(uuid4()), name=source, type="object", description="")
        )
        nodes[target] = nodes.get(
            target, Node(id=str(uuid4()), name=target, type="object", description="")
        )
        edge_type = "_".join([str(e) for e in triple.verb])
        edges.append(
            Edge(
                source_node_id=nodes[source].id,
                target_node_id=nodes[target].id,
                relationship_name=edge_type,
            )
        )
    return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
 def dummy_summarize_content(text):
    words = [(word, len(word)) for word in set(text.split(" "))]
    words = sorted(words, key=lambda x: x[1], reverse=True)
    summary = " ".join([word for word, _ in words[:50]])
    description = " ".join([word for word, _ in words[:10]])
    return SummarizedContent(summary=summary, description=description)
		`@ -0,0 +1,2 @@`
							`You are a helpful agent who are allowed to use only the provided question answer and context.`
							`I want to you find reasoning what is missing from the context or why the answer is not answering the question or not correct strictly based on the context.`