Add utils for graph visualization + classification nodes

2024-03-11 12:41:32 +01:00 · 2024-03-11 12:41:32 +01:00 · 7e964bcb23
commit 7e964bcb23
parent faf7e6ae59
12 changed files with 1487 additions and 1610 deletions
--- a/Demo_graph.ipynb
+++ b/Demo_graph.ipynb
--- a/cognitive_architecture/api/init.py
+++ b/cognitive_architecture/api/init.py
--- a/cognitive_architecture/api/v1/init.py
+++ b/cognitive_architecture/api/v1/init.py
--- a/cognitive_architecture/api/v1/cognify/cognify.py
+++ b/cognitive_architecture/api/v1/cognify/cognify.py
@ -14,8 +14,13 @@ from dotenv import load_dotenv
 import os
 from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
 from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes
 from cognitive_architecture.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
    connect_nodes_in_graph
 from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph
 from cognitive_architecture.modules.cognify.llm.add_node_connection_embeddings import process_items
 from cognitive_architecture.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
 from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions
 # Load environment variables from .env file
@ -146,7 +151,7 @@ async def cognify(input_text:str):
    await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1)
-    await append_to_graph(layer_1_graph, required_layers_one, graph_client)
+    F, unique_layer_uuids = await append_to_graph(layer_1_graph, required_layers_one, graph_client)
    def extract_node_descriptions(data):
        descriptions = []
@ -158,10 +163,10 @@ async def cognify(input_text:str):
        return descriptions
    # Extract the node descriptions
-    graph = await graph_client.graph()
+    graph = graph_client.graph
    node_descriptions = extract_node_descriptions(graph.nodes(data=True))
-    unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
+    # unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
-
+    #
    db = get_vector_database()
@ -178,10 +183,43 @@ async def cognify(input_text:str):
    for layer in unique_layer_uuids:
        await db.create_collection(layer,collection_config)
-    #to check if it works
+    # #to check if it works
-
+    #
    await add_propositions(node_descriptions, db)
    from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import AsyncQdrantClient
    grouped_data = await add_node_connection(graph_client, db, node_descriptions)
    llm_client = get_llm_client()
    relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
    results = await adapted_qdrant_batch_search(relationship_dict, db)
    relationship_d = graph_ready_output(results)
    CONNECTED_GRAPH = connect_nodes_in_graph(F, relationship_d)
    return CONNECTED_GRAPH
    #
    # grouped_data = {}
    #
    # # Iterate through each dictionary in the list
    # for item in node_descriptions:
    #     # Get the layer_decomposition_uuid of the current dictionary
    #     uuid = item['layer_decomposition_uuid']
    #
    #     # Check if this uuid is already a key in the grouped_data dictionary
    #     if uuid not in grouped_data:
    #         # If not, initialize a new list for this uuid
    #         grouped_data[uuid] = []
    #
    #     # Append the current dictionary to the list corresponding to its uuid
    #     grouped_data[uuid].append(item)
--- a/cognitive_architecture/infrastructure/databases/vector/qdrant/adapter.py
+++ b/cognitive_architecture/infrastructure/databases/vector/qdrant/adapter.py
@ -52,7 +52,7 @@ class QDrantAdapter(VectorDBInterface):
            quantization_config = collection_config.quantization_config
        )
-    async def create_data_points(self, collection_name: str, data_points: List[any]):
+    async def create_data_points(self, collection_name: str, data_points):
        client = self.get_qdrant_client()
        return await client.upload_points(
@ -96,11 +96,11 @@ class QDrantAdapter(VectorDBInterface):
                # vector= embedding,
                limit=3,
                with_vector=False
-            ) for embedding in embeddings
+            ) for embedding in [embeddings]
        ]
        # Perform batch search with the dynamically generated requests
-        results = client.search_batch(
+        results = await client.search_batch(
            collection_name=collection_name,
            requests=requests
        )
--- a/cognitive_architecture/infrastructure/databases/vector/vector_db_interface.py
+++ b/cognitive_architecture/infrastructure/databases/vector/vector_db_interface.py
@ -43,7 +43,7 @@ class VectorDBInterface(Protocol):
    async def create_data_points(
        self,
        collection_name: str,
-        data_points: List[any]
+        data_points
    ): raise NotImplementedError
    # @abstractmethod
--- a/cognitive_architecture/infrastructure/databases/vector/weaviate/adapter.py
+++ b/cognitive_architecture/infrastructure/databases/vector/weaviate/adapter.py
--- a/cognitive_architecture/modules/cognify/graph/add_node_connections.py
+++ b/cognitive_architecture/modules/cognify/graph/add_node_connections.py
@ -0,0 +1,113 @@
 from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
 from cognitive_architecture.shared.data_models import GraphDBType
 def extract_node_descriptions(data):
    descriptions = []
    for node_id, attributes in data:
        if 'description' in attributes and 'id' in attributes:
            descriptions.append({'node_id': attributes['id'], 'description': attributes['description'], 'layer_uuid': attributes['layer_uuid'], 'layer_decomposition_uuid': attributes['layer_decomposition_uuid'] })
    return descriptions
 def add_node_connection(graph_client, vector_database_client, data):
    graph = graph_client.graph
    node_descriptions = extract_node_descriptions(graph.nodes(data=True))
    grouped_data = {}
    # Iterate through each dictionary in the list
    for item in node_descriptions:
        # Get the layer_decomposition_uuid of the current dictionary
        uuid = item['layer_decomposition_uuid']
        # Check if this uuid is already a key in the grouped_data dictionary
        if uuid not in grouped_data:
            # If not, initialize a new list for this uuid
            grouped_data[uuid] = []
        # Append the current dictionary to the list corresponding to its uuid
        grouped_data[uuid].append(item)
    return grouped_data
 def connect_nodes_in_graph(graph, relationship_dict):
    """
    For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
    If they do, create a connection (edge) between them.
    :param graph: A NetworkX graph object
    :param relationship_dict: A dictionary containing relationships between nodes
    """
    for id, relationships in relationship_dict.items():
        for relationship in relationships:
            searched_node_attr_id = relationship['searched_node_id']
            print(searched_node_attr_id)
            score_attr_id = relationship['original_id_for_search']
            score = relationship['score']
            # Initialize node keys for both searched_node and score_node
            searched_node_key, score_node_key = None, None
            # Find nodes in the graph that match the searched_node_id and score_id from their attributes
            for node, attrs in graph.nodes(data=True):
                if 'id' in attrs:  # Ensure there is an 'id' attribute
                    if attrs['id'] == searched_node_attr_id:
                        searched_node_key = node
                    elif attrs['id'] == score_attr_id:
                        score_node_key = node
                # If both nodes are found, no need to continue checking other nodes
                if searched_node_key and score_node_key:
                    break
            # Check if both nodes were found in the graph
            if searched_node_key is not None and score_node_key is not None:
                print(searched_node_key)
                print(score_node_key)
                # If both nodes exist, create an edge between them
                # You can customize the edge attributes as needed, here we use 'score' as an attribute
                graph.add_edge(searched_node_key, score_node_key, weight=score,
                               score_metadata=relationship.get('score_metadata'))
    return graph
 def graph_ready_output(results):
    relationship_dict = {}
    for result_tuple in results:
        uuid, scored_points_list, desc, node_id = result_tuple
        # Unpack the tuple
        # Ensure there's a list to collect related items for this uuid
        if uuid not in relationship_dict:
            relationship_dict[uuid] = []
        for scored_points in scored_points_list:  # Iterate over the list of ScoredPoint lists
            for scored_point in scored_points:  # Iterate over each ScoredPoint object
                if scored_point.score > 0.9:  # Check the score condition
                    # Append a new dictionary to the list associated with the uuid
                    relationship_dict[uuid].append({
                        'collection_name_uuid': uuid,
                        'searched_node_id': scored_point.id,
                        'score': scored_point.score,
                        'score_metadata': scored_point.payload,
                        'original_id_for_search': node_id,
                    })
    return relationship_dict
 if __name__ == '__main__':
    graph_client = get_graph_client(GraphDBType.NETWORKX)
    add_node_connection(graph_client, None, None)
    # db = get_vector_database()
--- a/cognitive_architecture/modules/cognify/graph/add_propositions.py
+++ b/cognitive_architecture/modules/cognify/graph/add_propositions.py
@ -68,7 +68,7 @@ async def add_propositions(G, category_name, subclass_content, layer_description
 async def append_to_graph(layer_graphs, required_layers, G):
    # Generate a UUID for the overall layer
    layer_uuid = uuid.uuid4()
-
+    decomposition_uuids = set()
    # Extract category name from required_layers data
    category_name = required_layers.dict()['label']['type']
@ -84,7 +84,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
            # Generate a UUID for this particular layer decomposition
            layer_decomposition_uuid = uuid.uuid4()
-
+            decomposition_uuids.add(layer_decomposition_uuid)
            # Assuming append_data_to_graph is defined elsewhere and appends data to G
            # You would pass relevant information from knowledge_graph along with other details to this function
            F = await add_propositions(G, category_name, subgroup_name, layer_description, knowledge_graph,
@ -93,7 +93,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
            # Print updated graph for verification (assuming F is the updated NetworkX Graph)
            print("Updated Nodes:", F.graph.nodes(data=True))
-    return F
+    return F, decomposition_uuids
 if __name__ == "__main__":
--- a/cognitive_architecture/modules/cognify/llm/add_node_connection_embeddings.py
+++ b/cognitive_architecture/modules/cognify/llm/add_node_connection_embeddings.py
@ -0,0 +1,38 @@
 import asyncio
 async def process_items(grouped_data, unique_layer_uuids, llm_client):
    results_to_check = []  # This will hold results excluding self comparisons
    tasks = []  # List to hold all tasks
    task_to_info = {}  # Dictionary to map tasks to their corresponding group id and item info
    # Iterate through each group in grouped_data
    for group_id, items in grouped_data.items():
        # Filter unique_layer_uuids to exclude the current group_id
        target_uuids = [uuid for uuid in unique_layer_uuids if uuid != group_id]
        # Process each item in the group
        for item in items:
            # For each target UUID, create an async task for the item's embedding retrieval
            for target_id in target_uuids:
                task = asyncio.create_task(
                    llm_client.async_get_embedding_with_backoff(item['description'], "text-embedding-3-large"))
                tasks.append(task)
                # Map the task to the target id, item's node_id, and description for later retrieval
                task_to_info[task] = (target_id, item['node_id'], group_id, item['description'])
    # Await all tasks to complete and gather results
    results = await asyncio.gather(*tasks)
    # Process the results, associating them with their target id, node id, and description
    for task, embedding in zip(tasks, results):
        target_id, node_id, group_id, description = task_to_info[task]
        results_to_check.append([target_id, embedding, description, node_id, group_id])
    return results_to_check
 if __name__ == '__main__':
    process_items()
--- a/cognitive_architecture/modules/cognify/vector/batch_search.py
+++ b/cognitive_architecture/modules/cognify/vector/batch_search.py
@ -1,7 +1,7 @@
 from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
-async def adapted_qdrant_batch_search(results_to_check, client):
+async def adapted_qdrant_batch_search(results_to_check,vector_client):
    search_results_list = []
    for result in results_to_check:
@ -15,9 +15,10 @@ async def adapted_qdrant_batch_search(results_to_check, client):
        limits = [3] * len(embedding)  # Set a limit of 3 results for this embedding
        try:
-            # Perform the batch search for this id with its embedding
+            #Perform the batch search for this id with its embedding
-            # Assuming qdrant_batch_search function accepts a single embedding and a list of limits
+            #Assuming qdrant_batch_search function accepts a single embedding and a list of limits
-            id_search_results = await client.batch_search(id, [embedding], limits)
+            #qdrant_batch_search
            id_search_results = await vector_client.batch_search(collection_name = id, embeddings= embedding, with_vectors=limits)
            search_results_list.append((id, id_search_results, node_id, target))
        except Exception as e:
            print(f"Error during batch search for ID {id}: {e}")
@ -26,4 +27,6 @@ async def adapted_qdrant_batch_search(results_to_check, client):
    return search_results_list
-client = get_vector_database()
+if __name__ == '__main__':
    client = get_vector_database()
--- a/cognitive_architecture/modules/cognify/vector/load_propositions.py
+++ b/cognitive_architecture/modules/cognify/vector/load_propositions.py
@ -28,6 +28,7 @@ async def upload_embedding(id, metadata, some_embeddings, collection_name, clien
 async def add_propositions(node_descriptions, client):
    for item in node_descriptions:
        print(item['node_id'])
-        await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = get_embeddings(item['description']), collection_name= item['layer_decomposition_uuid'],client= client)
+        embeddings = await get_embeddings(item['description'])
        await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = embeddings[0], collection_name= item['layer_decomposition_uuid'],client= client)