Feature/cog 537 implement retrieval algorithm from research paper (#8)

2024-11-27 17:26:11 +01:00 · 2024-11-27 17:26:11 +01:00 · be6eebfbb1
commit be6eebfbb1
parent dfd30d8e54 3146ef75c9
19 changed files with 547 additions and 75 deletions
--- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
+++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@ -2,7 +2,7 @@
 import logging
 import asyncio
 from textwrap import dedent
-from typing import Optional, Any, List, Dict
+from typing import Optional, Any, List, Dict, Union
 from contextlib import asynccontextmanager
 from uuid import UUID
 from neo4j import AsyncSession
@ -432,3 +432,49 @@ class Neo4jAdapter(GraphDBInterface):
        ) for record in result]

        return (nodes, edges)
+
+    async def get_filtered_graph_data(self, attribute_filters):
+        """
+        Fetches nodes and relationships filtered by specified attribute values.
+
+        Args:
+            attribute_filters (list of dict): A list of dictionaries where keys are attributes and values are lists of values to filter on.
+                                              Example: [{"community": ["1", "2"]}]
+
+        Returns:
+            tuple: A tuple containing two lists: nodes and edges.
+        """
+        where_clauses = []
+        for attribute, values in attribute_filters[0].items():
+            values_str = ", ".join(f"'{value}'" if isinstance(value, str) else str(value) for value in values)
+            where_clauses.append(f"n.{attribute} IN [{values_str}]")
+
+        where_clause = " AND ".join(where_clauses)
+
+        query_nodes = f"""
+        MATCH (n)
+        WHERE {where_clause}
+        RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
+        """
+        result_nodes = await self.query(query_nodes)
+
+        nodes = [(
+            record["id"],
+            record["properties"],
+        ) for record in result_nodes]
+
+        query_edges = f"""
+        MATCH (n)-[r]->(m)
+        WHERE {where_clause} AND {where_clause.replace('n.', 'm.')}
+        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
+        """
+        result_edges = await self.query(query_edges)
+
+        edges = [(
+            record["source"],
+            record["target"],
+            record["type"],
+            record["properties"],
+        ) for record in result_edges]
+
+        return (nodes, edges)
--- a/cognee/infrastructure/databases/graph/networkx/adapter.py
+++ b/cognee/infrastructure/databases/graph/networkx/adapter.py
@ -6,7 +6,7 @@ import json
 import asyncio
 import logging
 from re import A
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Union
 from uuid import UUID
 import aiofiles
 import aiofiles.os as aiofiles_os
@ -301,3 +301,39 @@ class NetworkXAdapter(GraphDBInterface):
            logger.info("Graph deleted successfully.")
        except Exception as error:
            logger.error("Failed to delete graph: %s", error)
+
+    async def get_filtered_graph_data(self, attribute_filters: List[Dict[str, List[Union[str, int]]]]):
+        """
+        Fetches nodes and relationships filtered by specified attribute values.
+
+        Args:
+            attribute_filters (list of dict): A list of dictionaries where keys are attributes and values are lists of values to filter on.
+                                              Example: [{"community": ["1", "2"]}]
+
+        Returns:
+            tuple: A tuple containing two lists:
+                - Nodes: List of tuples (node_id, node_properties).
+                - Edges: List of tuples (source_id, target_id, relationship_type, edge_properties).
+        """
+        # Create filters for nodes based on the attribute filters
+        where_clauses = []
+        for attribute, values in attribute_filters[0].items():
+            where_clauses.append((attribute, values))
+
+        # Filter nodes
+        filtered_nodes = [
+            (node, data) for node, data in self.graph.nodes(data=True)
+            if all(data.get(attr) in values for attr, values in where_clauses)
+        ]
+
+        # Filter edges where both source and target nodes satisfy the filters
+        filtered_edges = [
+            (source, target, data.get('relationship_type', 'UNKNOWN'), data)
+            for source, target, data in self.graph.edges(data=True)
+            if (
+                    all(self.graph.nodes[source].get(attr) in values for attr, values in where_clauses) and
+                    all(self.graph.nodes[target].get(attr) in values for attr, values in where_clauses)
+            )
+        ]
+
+        return filtered_nodes, filtered_edges
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -10,6 +10,7 @@ from cognee.infrastructure.files.storage import LocalStorage
 from cognee.modules.storage.utils import copy_model, get_own_properties
 from ..models.ScoredResult import ScoredResult
 from ..vector_db_interface import VectorDBInterface
+from ..utils import normalize_distances
 from ..embeddings.EmbeddingEngine import EmbeddingEngine

 class IndexSchema(DataPoint):
@ -141,6 +142,33 @@ class LanceDBAdapter(VectorDBInterface):
            score = 0,
        ) for result in results.to_dict("index").values()]

+    async def get_distance_from_collection_elements(
+            self,
+            collection_name: str,
+            query_text: str = None,
+            query_vector: List[float] = None
+    ):
+        if query_text is None and query_vector is None:
+            raise ValueError("One of query_text or query_vector must be provided!")
+
+        if query_text and not query_vector:
+            query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
+
+        connection = await self.get_connection()
+        collection = await connection.open_table(collection_name)
+
+        results = await collection.vector_search(query_vector).to_pandas()
+
+        result_values = list(results.to_dict("index").values())
+
+        normalized_values = normalize_distances(result_values)
+
+        return [ScoredResult(
+            id=UUID(result["id"]),
+            payload=result["payload"],
+            score=normalized_values[value_index],
+        ) for value_index, result in enumerate(result_values)]
+
    async def search(
        self,
        collection_name: str,
@ -148,6 +176,7 @@ class LanceDBAdapter(VectorDBInterface):
        query_vector: List[float] = None,
        limit: int = 5,
        with_vector: bool = False,
+        normalized: bool = True
    ):
        if query_text is None and query_vector is None:
            raise ValueError("One of query_text or query_vector must be provided!")
@ -162,26 +191,7 @@ class LanceDBAdapter(VectorDBInterface):

        result_values = list(results.to_dict("index").values())

-        min_value = 100
-        max_value = 0
-
-        for result in result_values:
-            value = float(result["_distance"])
-            if value > max_value:
-                max_value = value
-            if value < min_value:
-                min_value = value
-
-        normalized_values = []
-        min_value = min(result["_distance"] for result in result_values)
-        max_value = max(result["_distance"] for result in result_values)
-
-        if max_value == min_value:
-            # Avoid division by zero: Assign all normalized values to 0 (or any constant value like 1)
-            normalized_values = [0 for _ in result_values]
-        else:
-            normalized_values = [(result["_distance"] - min_value) / (max_value - min_value) for result in
-                                result_values]
+        normalized_values = normalize_distances(result_values)

        return [ScoredResult(
            id = UUID(result["id"]),
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -11,6 +11,7 @@ from cognee.infrastructure.engine import DataPoint
 from .serialize_data import serialize_data
 from ..models.ScoredResult import ScoredResult
 from ..vector_db_interface import VectorDBInterface
+from ..utils import normalize_distances
 from ..embeddings.EmbeddingEngine import EmbeddingEngine
 from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
 from ...relational.ModelBase import Base
@ -22,6 +23,19 @@ class IndexSchema(DataPoint):
        "index_fields": ["text"]
    }

+def singleton(class_):
+    # Note: Using this singleton as a decorator to a class removes
+    # the option to use class methods for that class
+    instances = {}
+
+    def getinstance(*args, **kwargs):
+        if class_ not in instances:
+            instances[class_] = class_(*args, **kwargs)
+        return instances[class_]
+
+    return getinstance
+
+@singleton
 class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):

    def __init__(
@ -162,6 +176,51 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
                ) for result in results
            ]

+    async def get_distance_from_collection_elements(
+            self,
+            collection_name: str,
+            query_text: str = None,
+            query_vector: List[float] = None,
+            with_vector: bool = False
+    )-> List[ScoredResult]:
+        if query_text is None and query_vector is None:
+            raise ValueError("One of query_text or query_vector must be provided!")
+
+        if query_text and not query_vector:
+            query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
+
+        # Get PGVectorDataPoint Table from database
+        PGVectorDataPoint = await self.get_table(collection_name)
+
+        # Use async session to connect to the database
+        async with self.get_async_session() as session:
+            # Find closest vectors to query_vector
+            closest_items = await session.execute(
+                select(
+                    PGVectorDataPoint,
+                    PGVectorDataPoint.c.vector.cosine_distance(query_vector).label(
+                        "similarity"
+                    ),
+                )
+                .order_by("similarity")
+            )
+
+        vector_list = []
+
+        # Extract distances and find min/max for normalization
+        for vector in closest_items:
+            # TODO: Add normalization of similarity score
+            vector_list.append(vector)
+
+        # Create and return ScoredResult objects
+        return [
+            ScoredResult(
+                id = UUID(str(row.id)),
+                payload = row.payload,
+                score = row.similarity
+            ) for row in vector_list
+        ]
+
    async def search(
        self,
        collection_name: str,
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -142,6 +142,41 @@ class QDrantAdapter(VectorDBInterface):
        await client.close()
        return results

+    async def get_distance_from_collection_elements(
+            self,
+            collection_name: str,
+            query_text: str = None,
+            query_vector: List[float] = None,
+            with_vector: bool = False
+    ) -> List[ScoredResult]:
+
+        if query_text is None and query_vector is None:
+            raise ValueError("One of query_text or query_vector must be provided!")
+
+        client = self.get_qdrant_client()
+
+        results = await client.search(
+            collection_name = collection_name,
+            query_vector = models.NamedVector(
+                name = "text",
+                vector = query_vector if query_vector is not None else (await self.embed_data([query_text]))[0],
+            ),
+            with_vectors = with_vector
+        )
+
+        await client.close()
+
+        return [
+            ScoredResult(
+                id = UUID(result.id),
+                payload = {
+                    **result.payload,
+                    "id": UUID(result.id),
+                },
+                score = 1 - result.score,
+            ) for result in results
+        ]
+
    async def search(
        self,
        collection_name: str,
--- a/cognee/infrastructure/databases/vector/utils.py
+++ b/cognee/infrastructure/databases/vector/utils.py
@ -0,0 +1,16 @@
+from typing import List
+
+
+def normalize_distances(result_values: List[dict]) -> List[float]:
+
+    min_value = min(result["_distance"] for result in result_values)
+    max_value = max(result["_distance"] for result in result_values)
+
+    if max_value == min_value:
+        # Avoid division by zero: Assign all normalized values to 0 (or any constant value like 1)
+        normalized_values = [0 for _ in result_values]
+    else:
+        normalized_values = [(result["_distance"] - min_value) / (max_value - min_value) for result in
+                             result_values]
+
+    return normalized_values
--- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
+++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
@ -153,6 +153,36 @@ class WeaviateAdapter(VectorDBInterface):

        return await future

+    async def get_distance_from_collection_elements(
+            self,
+            collection_name: str,
+            query_text: str = None,
+            query_vector: List[float] = None,
+            with_vector: bool = False
+    ) -> List[ScoredResult]:
+        import weaviate.classes as wvc
+
+        if query_text is None and query_vector is None:
+            raise ValueError("One of query_text or query_vector must be provided!")
+
+        if query_vector is None:
+            query_vector = (await self.embed_data([query_text]))[0]
+
+        search_result = self.get_collection(collection_name).query.hybrid(
+            query=None,
+            vector=query_vector,
+            include_vector=with_vector,
+            return_metadata=wvc.query.MetadataQuery(score=True),
+        )
+
+        return [
+            ScoredResult(
+                id=UUID(str(result.uuid)),
+                payload=result.properties,
+                score=1 - float(result.metadata.score)
+            ) for result in search_result.objects
+        ]
+
    async def search(
            self,
            collection_name: str,
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@ -1,9 +1,12 @@
-from typing import List, Dict, Union
+import numpy as np

+from typing import List, Dict, Union
 from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
 from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
-from cognee.infrastructure.databases.graph import get_graph_engine
+import heapq
+from graphistry import edges
+

 class CogneeGraph(CogneeAbstractGraph):
    """
@ -39,26 +42,33 @@ class CogneeGraph(CogneeAbstractGraph):
    def get_node(self, node_id: str) -> Node:
        return self.nodes.get(node_id, None)

-    def get_edges(self, node_id: str) -> List[Edge]:
+    def get_edges_from_node(self, node_id: str) -> List[Edge]:
        node = self.get_node(node_id)
        if node:
            return node.skeleton_edges
        else:
            raise ValueError(f"Node with id {node_id} does not exist.")

+    def get_edges(self)-> List[Edge]:
+        return self.edges
+
    async def project_graph_from_db(self,
                                    adapter: Union[GraphDBInterface],
                                    node_properties_to_project: List[str],
                                    edge_properties_to_project: List[str],
                                    directed = True,
                                    node_dimension = 1,
-                                    edge_dimension = 1) -> None:
+                                    edge_dimension = 1,
+                                    memory_fragment_filter = []) -> None:

        if node_dimension < 1 or edge_dimension < 1:
            raise ValueError("Dimensions must be positive integers")

        try:
-            nodes_data, edges_data = await adapter.get_graph_data()
+            if len(memory_fragment_filter) == 0:
+                nodes_data, edges_data = await adapter.get_graph_data()
+            else:
+                nodes_data, edges_data = await adapter.get_filtered_graph_data(attribute_filters = memory_fragment_filter)

            if not nodes_data:
                raise ValueError("No node data retrieved from the database.")
@ -89,3 +99,81 @@ class CogneeGraph(CogneeAbstractGraph):
            print(f"Error projecting graph: {e}")
        except Exception as ex:
            print(f"Unexpected error: {ex}")
+
+    async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
+        for category, scored_results in node_distances.items():
+            for scored_result in scored_results:
+                node_id = str(scored_result.id)
+                score = scored_result.score
+                node =self.get_node(node_id)
+                if node:
+                    node.add_attribute("vector_distance", score)
+                else:
+                    print(f"Node with id {node_id} not found in the graph.")
+
+    async def map_vector_distances_to_graph_edges(self, vector_engine, query) -> None: # :TODO: When we calculate edge embeddings in vector db change this similarly to node mapping
+        try:
+            # Step 1: Generate the query embedding
+            query_vector = await vector_engine.embed_data([query])
+            query_vector = query_vector[0]
+            if query_vector is None or len(query_vector) == 0:
+                raise ValueError("Failed to generate query embedding.")
+
+            # Step 2: Collect all unique relationship types
+            unique_relationship_types = set()
+            for edge in self.edges:
+                relationship_type = edge.attributes.get('relationship_type')
+                if relationship_type:
+                    unique_relationship_types.add(relationship_type)
+
+            # Step 3: Embed all unique relationship types
+            unique_relationship_types = list(unique_relationship_types)
+            relationship_type_embeddings = await vector_engine.embed_data(unique_relationship_types)
+
+            # Step 4: Map relationship types to their embeddings and calculate distances
+            embedding_map = {}
+            for relationship_type, embedding in zip(unique_relationship_types, relationship_type_embeddings):
+                edge_vector = np.array(embedding)
+
+                # Calculate cosine similarity
+                similarity = np.dot(query_vector, edge_vector) / (
+                        np.linalg.norm(query_vector) * np.linalg.norm(edge_vector)
+                )
+                distance = 1 - similarity
+
+                # Round the distance to 4 decimal places and store it
+                embedding_map[relationship_type] = round(distance, 4)
+
+            # Step 4: Assign precomputed distances to edges
+            for edge in self.edges:
+                relationship_type = edge.attributes.get('relationship_type')
+                if not relationship_type or relationship_type not in embedding_map:
+                    print(f"Edge {edge} has an unknown or missing relationship type.")
+                    continue
+
+                # Assign the precomputed distance
+                edge.attributes["vector_distance"] = embedding_map[relationship_type]
+
+        except Exception as ex:
+            print(f"Error mapping vector distances to edges: {ex}")
+
+
+    async def calculate_top_triplet_importances(self, k: int) -> List:
+        min_heap = []
+        for i, edge in enumerate(self.edges):
+            source_node = self.get_node(edge.node1.id)
+            target_node = self.get_node(edge.node2.id)
+
+            source_distance = source_node.attributes.get("vector_distance", 1) if source_node else 1
+            target_distance = target_node.attributes.get("vector_distance", 1) if target_node else 1
+            edge_distance = edge.attributes.get("vector_distance", 1)
+
+            total_distance = source_distance + target_distance + edge_distance
+
+            heapq.heappush(min_heap, (-total_distance, i, edge))
+            if len(min_heap) > k:
+                heapq.heappop(min_heap)
+
+
+        return [edge for _, _, edge in sorted(min_heap)]
+
--- a/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraphElements.py
@ -1,5 +1,5 @@
 import numpy as np
-from typing import List, Dict, Optional, Any
+from typing import List, Dict, Optional, Any, Union

 class Node:
    """
@ -21,6 +21,7 @@ class Node:
            raise ValueError("Dimension must be a positive integer")
        self.id = node_id
        self.attributes = attributes if attributes is not None else {}
+        self.attributes["vector_distance"] = float('inf')
        self.skeleton_neighbours = []
        self.skeleton_edges = []
        self.status = np.ones(dimension, dtype=int)
@ -55,6 +56,12 @@ class Node:
            raise ValueError(f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}.")
        return self.status[dimension] == 1

+    def add_attribute(self, key: str, value: Any) -> None:
+        self.attributes[key] = value
+
+    def get_attribute(self, key: str) -> Union[str, int, float]:
+        return self.attributes[key]
+
    def __repr__(self) -> str:
        return f"Node({self.id}, attributes={self.attributes})"

@ -87,6 +94,7 @@ class Edge:
        self.node1 = node1
        self.node2 = node2
        self.attributes = attributes if attributes is not None else {}
+        self.attributes["vector_distance"] = float('inf')
        self.directed = directed
        self.status = np.ones(dimension, dtype=int)

@ -95,6 +103,12 @@ class Edge:
            raise ValueError(f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}.")
        return self.status[dimension] == 1

+    def add_attribute(self, key: str, value: Any) -> None:
+        self.attributes[key] = value
+
+    def get_attribute(self, key: str, value: Any) -> Union[str, int, float]:
+        return self.attributes[key]
+
    def __repr__(self) -> str:
        direction = "->" if self.directed else "--"
        return f"Edge({self.node1.id} {direction} {self.node2.id}, attributes={self.attributes})"
--- a/cognee/modules/retrieval/init.py
+++ b/cognee/modules/retrieval/init.py
--- a/cognee/modules/retrieval/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/brute_force_triplet_search.py
@ -0,0 +1,150 @@
+import asyncio
+import logging
+from typing import List
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_default_user
+from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.shared.utils import send_telemetry
+
+def format_triplets(edges):
+    print("\n\n\n")
+    def filter_attributes(obj, attributes):
+        """Helper function to filter out non-None properties, including nested dicts."""
+        result = {}
+        for attr in attributes:
+            value = getattr(obj, attr, None)
+            if value is not None:
+                # If the value is a dict, extract relevant keys from it
+                if isinstance(value, dict):
+                    nested_values = {k: v for k, v in value.items() if k in attributes and v is not None}
+                    result[attr] = nested_values
+                else:
+                    result[attr] = value
+        return result
+
+    triplets = []
+    for edge in edges:
+        node1 = edge.node1
+        node2 = edge.node2
+        edge_attributes = edge.attributes
+        node1_attributes = node1.attributes
+        node2_attributes = node2.attributes
+
+        # Filter only non-None properties
+        node1_info = {key: value for key, value in node1_attributes.items() if value is not None}
+        node2_info = {key: value for key, value in node2_attributes.items() if value is not None}
+        edge_info = {key: value for key, value in edge_attributes.items() if value is not None}
+
+        # Create the formatted triplet
+        triplet = (
+            f"Node1: {node1_info}\n"
+            f"Edge: {edge_info}\n"
+            f"Node2: {node2_info}\n\n\n"
+        )
+        triplets.append(triplet)
+
+    return "".join(triplets)
+
+
+async def brute_force_triplet_search(query: str, user: User = None, top_k = 5) -> list:
+    if user is None:
+        user = await get_default_user()
+
+    if user is None:
+        raise PermissionError("No user found in the system. Please create a user.")
+
+    retrieved_results = await brute_force_search(query, user, top_k)
+
+
+    return retrieved_results
+
+
+def delete_duplicated_vector_db_elements(collections, results): #:TODO: This is just for now to fix vector db duplicates
+    results_dict = {}
+    for collection, results in zip(collections, results):
+        seen_ids = set()
+        unique_results = []
+        for result in results:
+            if result.id not in seen_ids:
+                unique_results.append(result)
+                seen_ids.add(result.id)
+            else:
+                print(f"Duplicate found in collection '{collection}': {result.id}")
+        results_dict[collection] = unique_results
+
+    return results_dict
+
+
+async def brute_force_search(
+        query: str,
+        user: User,
+        top_k: int,
+        collections: List[str] = None
+) -> list:
+    """
+        Performs a brute force search to retrieve the top triplets from the graph.
+
+        Args:
+            query (str): The search query.
+            user (User): The user performing the search.
+            top_k (int): The number of top results to retrieve.
+            collections (Optional[List[str]]): List of collections to query. Defaults to predefined collections.
+
+        Returns:
+            list: The top triplet results.
+    """
+    if not query or not isinstance(query, str):
+        raise ValueError("The query must be a non-empty string.")
+    if top_k <= 0:
+        raise ValueError("top_k must be a positive integer.")
+
+    if collections is None:
+        collections = ["entity_name", "text_summary_text", "entity_type_name", "document_chunk_text"]
+
+    try:
+        vector_engine = get_vector_engine()
+        graph_engine = await get_graph_engine()
+    except Exception as e:
+        logging.error("Failed to initialize engines: %s", e)
+        raise RuntimeError("Initialization error") from e
+
+    send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
+
+    try:
+        results = await asyncio.gather(
+            *[vector_engine.get_distance_from_collection_elements(collection, query_text=query) for collection in collections]
+        )
+
+        ############################################# :TODO: Change when vector db does not contain duplicates
+        node_distances = delete_duplicated_vector_db_elements(collections, results)
+        # node_distances = {collection: result for collection, result in zip(collections, results)}
+        ##############################################
+
+        memory_fragment = CogneeGraph()
+
+        await memory_fragment.project_graph_from_db(graph_engine,
+                                              node_properties_to_project=['id',
+                                                                          'description',
+                                                                          'name',
+                                                                          'type',
+                                                                          'text'],
+                                              edge_properties_to_project=['relationship_name'])
+
+        await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
+
+        #:TODO: Change when vectordb contains edge embeddings
+        await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)
+
+        results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
+
+        send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
+
+        #:TODO: Once we have Edge pydantic models we should retrieve the exact edge and node objects from graph db
+        return results
+
+    except Exception as e:
+        logging.error("Error during brute force search for user: %s, query: %s. Error: %s", user.id, query, e)
+        send_telemetry("cognee.brute_force_triplet_search EXECUTION FAILED", user.id)
+        raise RuntimeError("An error occurred during brute force search") from e
--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@ -4,6 +4,7 @@ import logging
 import pathlib
 import cognee
 from cognee.api.v1.search import SearchType
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search

 logging.basicConfig(level=logging.DEBUG)

@ -61,6 +62,9 @@ async def main():

    assert len(history) == 6, "Search history is not correct."

+    results = await brute_force_triplet_search('What is a quantum computer?')
+    assert len(results) > 0
+
    await cognee.prune.prune_data()
    assert not os.path.isdir(data_directory_path), "Local data files are not deleted"

--- a/cognee/tests/test_pgvector.py
+++ b/cognee/tests/test_pgvector.py
@ -3,6 +3,7 @@ import logging
 import pathlib
 import cognee
 from cognee.api.v1.search import SearchType
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search

 logging.basicConfig(level=logging.DEBUG)

@ -89,6 +90,9 @@ async def main():
    history = await cognee.get_search_history()
    assert len(history) == 6, "Search history is not correct."

+    results = await brute_force_triplet_search('What is a quantum computer?')
+    assert len(results) > 0
+
    await cognee.prune.prune_data()
    assert not os.path.isdir(data_directory_path), "Local data files are not deleted"

--- a/cognee/tests/test_qdrant.py
+++ b/cognee/tests/test_qdrant.py
@ -5,6 +5,7 @@ import logging
 import pathlib
 import cognee
 from cognee.api.v1.search import SearchType
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search

 logging.basicConfig(level=logging.DEBUG)

@ -61,6 +62,9 @@ async def main():
    history = await cognee.get_search_history()
    assert len(history) == 6, "Search history is not correct."

+    results = await brute_force_triplet_search('What is a quantum computer?')
+    assert len(results) > 0
+
    await cognee.prune.prune_data()
    assert not os.path.isdir(data_directory_path), "Local data files are not deleted"

--- a/cognee/tests/test_weaviate.py
+++ b/cognee/tests/test_weaviate.py
@ -3,6 +3,7 @@ import logging
 import pathlib
 import cognee
 from cognee.api.v1.search import SearchType
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search

 logging.basicConfig(level=logging.DEBUG)

@ -59,6 +60,9 @@ async def main():
    history = await cognee.get_search_history()
    assert len(history) == 6, "Search history is not correct."

+    results = await brute_force_triplet_search('What is a quantum computer?')
+    assert len(results) > 0
+
    await cognee.prune.prune_data()
    assert not os.path.isdir(data_directory_path), "Local data files are not deleted"

--- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py
+++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py
@ -8,7 +8,7 @@ def test_node_initialization():
    """Test that a Node is initialized correctly."""
    node = Node("node1", {"attr1": "value1"}, dimension=2)
    assert node.id == "node1"
-    assert node.attributes == {"attr1": "value1"}
+    assert node.attributes == {"attr1": "value1", 'vector_distance': np.inf}
    assert len(node.status) == 2
    assert np.all(node.status == 1)

@ -95,7 +95,7 @@ def test_edge_initialization():
    edge = Edge(node1, node2, {"weight": 10}, directed=False, dimension=2)
    assert edge.node1 == node1
    assert edge.node2 == node2
-    assert edge.attributes == {"weight": 10}
+    assert edge.attributes == {'vector_distance': np.inf,"weight": 10}
    assert edge.directed is False
    assert len(edge.status) == 2
    assert np.all(edge.status == 1)
--- a/cognee/tests/unit/modules/graph/cognee_graph_test.py
+++ b/cognee/tests/unit/modules/graph/cognee_graph_test.py
@ -77,11 +77,11 @@ def test_get_edges_success(setup_graph):
    graph.add_node(node2)
    edge = Edge(node1, node2)
    graph.add_edge(edge)
-    assert edge in graph.get_edges("node1")
+    assert edge in graph.get_edges_from_node("node1")


 def test_get_edges_nonexistent_node(setup_graph):
    """Test retrieving edges for a nonexistent node raises an exception."""
    graph = setup_graph
    with pytest.raises(ValueError, match="Node with id nonexistent does not exist."):
-        graph.get_edges("nonexistent")
+        graph.get_edges_from_node("nonexistent")
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -46,7 +46,7 @@ services:
      - 7687:7687
    environment:
      - NEO4J_AUTH=neo4j/pleaseletmein
-      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_PLUGINS=["apoc", "graph-data-science"]
    networks:
      - cognee-network

--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@ -1,32 +1,7 @@
 import cognee
 import asyncio
-from cognee.api.v1.search import SearchType
-
-job_position = """0:Senior Data Scientist (Machine Learning)
-
-Company: TechNova Solutions
-Location: San Francisco, CA
-
-Job Description:
-
-TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.
-
-Responsibilities:
-
-Develop and implement advanced machine learning algorithms and models.
-Analyze large, complex datasets to extract meaningful patterns and insights.
-Collaborate with cross-functional teams to integrate predictive models into products.
-Stay updated with the latest advancements in machine learning and data science.
-Mentor junior data scientists and provide technical guidance.
-Qualifications:
-
-Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field.
-5+ years of experience in data science and machine learning.
-Proficient in Python, R, and SQL.
-Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).
-Strong problem-solving skills and attention to detail.
-Candidate CVs
-"""
+from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
+from cognee.modules.retrieval.brute_force_triplet_search import format_triplets

 job_1 = """
 CV 1: Relevant
@ -195,7 +170,7 @@ async def main(enable_steps):

    # Step 2: Add text
    if enable_steps.get("add_text"):
-        text_list = [job_position, job_1, job_2, job_3, job_4, job_5]
+        text_list = [job_1, job_2, job_3, job_4, job_5]
        for text in text_list:
            await cognee.add(text)
            print(f"Added text: {text[:35]}...")
@ -206,24 +181,21 @@ async def main(enable_steps):
        print("Knowledge graph created.")

    # Step 4: Query insights
-    if enable_steps.get("search_insights"):
-        search_results = await cognee.search(
-            SearchType.INSIGHTS,
-            {'query': 'Which applicant has the most relevant experience in data science?'}
-        )
-        print("Search results:")
-        for result_text in search_results:
-            print(result_text)
-
+    if enable_steps.get("retriever"):
+        results = await brute_force_triplet_search('Who has the most experience with graphic design?')
+        print(format_triplets(results))

 if __name__ == '__main__':
    # Flags to enable/disable steps
+
+    rebuild_kg = True
+    retrieve = True
    steps_to_enable = {
-        "prune_data": True,
-        "prune_system": True,
-        "add_text": True,
-        "cognify": True,
-        "search_insights": True
+        "prune_data": rebuild_kg,
+        "prune_system": rebuild_kg,
+        "add_text": rebuild_kg,
+        "cognify": rebuild_kg,
+        "retriever": retrieve
    }

    asyncio.run(main(steps_to_enable))