Add utils for graph visualization + classification nodes

This commit is contained in:
Vasilije 2024-03-11 12:41:32 +01:00
parent faf7e6ae59
commit 7e964bcb23
12 changed files with 1487 additions and 1610 deletions

File diff suppressed because one or more lines are too long

View file

View file

@ -14,8 +14,13 @@ from dotenv import load_dotenv
import os
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes
from cognitive_architecture.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
connect_nodes_in_graph
from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph
from cognitive_architecture.modules.cognify.llm.add_node_connection_embeddings import process_items
from cognitive_architecture.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions
# Load environment variables from .env file
@ -146,7 +151,7 @@ async def cognify(input_text:str):
await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1)
await append_to_graph(layer_1_graph, required_layers_one, graph_client)
F, unique_layer_uuids = await append_to_graph(layer_1_graph, required_layers_one, graph_client)
def extract_node_descriptions(data):
descriptions = []
@ -158,10 +163,10 @@ async def cognify(input_text:str):
return descriptions
# Extract the node descriptions
graph = await graph_client.graph()
graph = graph_client.graph
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
# unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
#
db = get_vector_database()
@ -178,10 +183,43 @@ async def cognify(input_text:str):
for layer in unique_layer_uuids:
await db.create_collection(layer,collection_config)
#to check if it works
# #to check if it works
#
await add_propositions(node_descriptions, db)
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import AsyncQdrantClient
grouped_data = await add_node_connection(graph_client, db, node_descriptions)
llm_client = get_llm_client()
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
results = await adapted_qdrant_batch_search(relationship_dict, db)
relationship_d = graph_ready_output(results)
CONNECTED_GRAPH = connect_nodes_in_graph(F, relationship_d)
return CONNECTED_GRAPH
#
# grouped_data = {}
#
# # Iterate through each dictionary in the list
# for item in node_descriptions:
# # Get the layer_decomposition_uuid of the current dictionary
# uuid = item['layer_decomposition_uuid']
#
# # Check if this uuid is already a key in the grouped_data dictionary
# if uuid not in grouped_data:
# # If not, initialize a new list for this uuid
# grouped_data[uuid] = []
#
# # Append the current dictionary to the list corresponding to its uuid
# grouped_data[uuid].append(item)

View file

@ -52,7 +52,7 @@ class QDrantAdapter(VectorDBInterface):
quantization_config = collection_config.quantization_config
)
async def create_data_points(self, collection_name: str, data_points: List[any]):
async def create_data_points(self, collection_name: str, data_points):
client = self.get_qdrant_client()
return await client.upload_points(
@ -96,11 +96,11 @@ class QDrantAdapter(VectorDBInterface):
# vector= embedding,
limit=3,
with_vector=False
) for embedding in embeddings
) for embedding in [embeddings]
]
# Perform batch search with the dynamically generated requests
results = client.search_batch(
results = await client.search_batch(
collection_name=collection_name,
requests=requests
)

View file

@ -43,7 +43,7 @@ class VectorDBInterface(Protocol):
async def create_data_points(
self,
collection_name: str,
data_points: List[any]
data_points
): raise NotImplementedError
# @abstractmethod

View file

@ -0,0 +1,113 @@
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType
def extract_node_descriptions(data):
descriptions = []
for node_id, attributes in data:
if 'description' in attributes and 'id' in attributes:
descriptions.append({'node_id': attributes['id'], 'description': attributes['description'], 'layer_uuid': attributes['layer_uuid'], 'layer_decomposition_uuid': attributes['layer_decomposition_uuid'] })
return descriptions
def add_node_connection(graph_client, vector_database_client, data):
graph = graph_client.graph
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
grouped_data = {}
# Iterate through each dictionary in the list
for item in node_descriptions:
# Get the layer_decomposition_uuid of the current dictionary
uuid = item['layer_decomposition_uuid']
# Check if this uuid is already a key in the grouped_data dictionary
if uuid not in grouped_data:
# If not, initialize a new list for this uuid
grouped_data[uuid] = []
# Append the current dictionary to the list corresponding to its uuid
grouped_data[uuid].append(item)
return grouped_data
def connect_nodes_in_graph(graph, relationship_dict):
"""
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
If they do, create a connection (edge) between them.
:param graph: A NetworkX graph object
:param relationship_dict: A dictionary containing relationships between nodes
"""
for id, relationships in relationship_dict.items():
for relationship in relationships:
searched_node_attr_id = relationship['searched_node_id']
print(searched_node_attr_id)
score_attr_id = relationship['original_id_for_search']
score = relationship['score']
# Initialize node keys for both searched_node and score_node
searched_node_key, score_node_key = None, None
# Find nodes in the graph that match the searched_node_id and score_id from their attributes
for node, attrs in graph.nodes(data=True):
if 'id' in attrs: # Ensure there is an 'id' attribute
if attrs['id'] == searched_node_attr_id:
searched_node_key = node
elif attrs['id'] == score_attr_id:
score_node_key = node
# If both nodes are found, no need to continue checking other nodes
if searched_node_key and score_node_key:
break
# Check if both nodes were found in the graph
if searched_node_key is not None and score_node_key is not None:
print(searched_node_key)
print(score_node_key)
# If both nodes exist, create an edge between them
# You can customize the edge attributes as needed, here we use 'score' as an attribute
graph.add_edge(searched_node_key, score_node_key, weight=score,
score_metadata=relationship.get('score_metadata'))
return graph
def graph_ready_output(results):
relationship_dict = {}
for result_tuple in results:
uuid, scored_points_list, desc, node_id = result_tuple
# Unpack the tuple
# Ensure there's a list to collect related items for this uuid
if uuid not in relationship_dict:
relationship_dict[uuid] = []
for scored_points in scored_points_list: # Iterate over the list of ScoredPoint lists
for scored_point in scored_points: # Iterate over each ScoredPoint object
if scored_point.score > 0.9: # Check the score condition
# Append a new dictionary to the list associated with the uuid
relationship_dict[uuid].append({
'collection_name_uuid': uuid,
'searched_node_id': scored_point.id,
'score': scored_point.score,
'score_metadata': scored_point.payload,
'original_id_for_search': node_id,
})
return relationship_dict
if __name__ == '__main__':
graph_client = get_graph_client(GraphDBType.NETWORKX)
add_node_connection(graph_client, None, None)
# db = get_vector_database()

View file

@ -68,7 +68,7 @@ async def add_propositions(G, category_name, subclass_content, layer_description
async def append_to_graph(layer_graphs, required_layers, G):
# Generate a UUID for the overall layer
layer_uuid = uuid.uuid4()
decomposition_uuids = set()
# Extract category name from required_layers data
category_name = required_layers.dict()['label']['type']
@ -84,7 +84,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
# Generate a UUID for this particular layer decomposition
layer_decomposition_uuid = uuid.uuid4()
decomposition_uuids.add(layer_decomposition_uuid)
# Assuming append_data_to_graph is defined elsewhere and appends data to G
# You would pass relevant information from knowledge_graph along with other details to this function
F = await add_propositions(G, category_name, subgroup_name, layer_description, knowledge_graph,
@ -93,7 +93,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
print("Updated Nodes:", F.graph.nodes(data=True))
return F
return F, decomposition_uuids
if __name__ == "__main__":

View file

@ -0,0 +1,38 @@
import asyncio
async def process_items(grouped_data, unique_layer_uuids, llm_client):
results_to_check = [] # This will hold results excluding self comparisons
tasks = [] # List to hold all tasks
task_to_info = {} # Dictionary to map tasks to their corresponding group id and item info
# Iterate through each group in grouped_data
for group_id, items in grouped_data.items():
# Filter unique_layer_uuids to exclude the current group_id
target_uuids = [uuid for uuid in unique_layer_uuids if uuid != group_id]
# Process each item in the group
for item in items:
# For each target UUID, create an async task for the item's embedding retrieval
for target_id in target_uuids:
task = asyncio.create_task(
llm_client.async_get_embedding_with_backoff(item['description'], "text-embedding-3-large"))
tasks.append(task)
# Map the task to the target id, item's node_id, and description for later retrieval
task_to_info[task] = (target_id, item['node_id'], group_id, item['description'])
# Await all tasks to complete and gather results
results = await asyncio.gather(*tasks)
# Process the results, associating them with their target id, node id, and description
for task, embedding in zip(tasks, results):
target_id, node_id, group_id, description = task_to_info[task]
results_to_check.append([target_id, embedding, description, node_id, group_id])
return results_to_check
if __name__ == '__main__':
process_items()

View file

@ -1,7 +1,7 @@
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
async def adapted_qdrant_batch_search(results_to_check, client):
async def adapted_qdrant_batch_search(results_to_check,vector_client):
search_results_list = []
for result in results_to_check:
@ -15,9 +15,10 @@ async def adapted_qdrant_batch_search(results_to_check, client):
limits = [3] * len(embedding) # Set a limit of 3 results for this embedding
try:
# Perform the batch search for this id with its embedding
# Assuming qdrant_batch_search function accepts a single embedding and a list of limits
id_search_results = await client.batch_search(id, [embedding], limits)
#Perform the batch search for this id with its embedding
#Assuming qdrant_batch_search function accepts a single embedding and a list of limits
#qdrant_batch_search
id_search_results = await vector_client.batch_search(collection_name = id, embeddings= embedding, with_vectors=limits)
search_results_list.append((id, id_search_results, node_id, target))
except Exception as e:
print(f"Error during batch search for ID {id}: {e}")
@ -26,4 +27,6 @@ async def adapted_qdrant_batch_search(results_to_check, client):
return search_results_list
client = get_vector_database()
if __name__ == '__main__':
client = get_vector_database()

View file

@ -28,6 +28,7 @@ async def upload_embedding(id, metadata, some_embeddings, collection_name, clien
async def add_propositions(node_descriptions, client):
for item in node_descriptions:
print(item['node_id'])
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = get_embeddings(item['description']), collection_name= item['layer_decomposition_uuid'],client= client)
embeddings = await get_embeddings(item['description'])
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = embeddings[0], collection_name= item['layer_decomposition_uuid'],client= client)