Add utils for graph visualization + classification nodes
This commit is contained in:
parent
faf7e6ae59
commit
7e964bcb23
12 changed files with 1487 additions and 1610 deletions
2866
Demo_graph.ipynb
2866
Demo_graph.ipynb
File diff suppressed because one or more lines are too long
0
cognitive_architecture/api/__init__.py
Normal file
0
cognitive_architecture/api/__init__.py
Normal file
0
cognitive_architecture/api/v1/__init__.py
Normal file
0
cognitive_architecture/api/v1/__init__.py
Normal file
|
|
@ -14,8 +14,13 @@ from dotenv import load_dotenv
|
|||
import os
|
||||
|
||||
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
|
||||
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes
|
||||
from cognitive_architecture.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
|
||||
connect_nodes_in_graph
|
||||
from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph
|
||||
from cognitive_architecture.modules.cognify.llm.add_node_connection_embeddings import process_items
|
||||
from cognitive_architecture.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
|
||||
from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions
|
||||
|
||||
# Load environment variables from .env file
|
||||
|
|
@ -146,7 +151,7 @@ async def cognify(input_text:str):
|
|||
|
||||
await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1)
|
||||
|
||||
await append_to_graph(layer_1_graph, required_layers_one, graph_client)
|
||||
F, unique_layer_uuids = await append_to_graph(layer_1_graph, required_layers_one, graph_client)
|
||||
|
||||
def extract_node_descriptions(data):
|
||||
descriptions = []
|
||||
|
|
@ -158,10 +163,10 @@ async def cognify(input_text:str):
|
|||
return descriptions
|
||||
|
||||
# Extract the node descriptions
|
||||
graph = await graph_client.graph()
|
||||
graph = graph_client.graph
|
||||
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
|
||||
unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
|
||||
|
||||
# unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
|
||||
#
|
||||
db = get_vector_database()
|
||||
|
||||
|
||||
|
|
@ -178,10 +183,43 @@ async def cognify(input_text:str):
|
|||
for layer in unique_layer_uuids:
|
||||
await db.create_collection(layer,collection_config)
|
||||
|
||||
#to check if it works
|
||||
|
||||
# #to check if it works
|
||||
#
|
||||
await add_propositions(node_descriptions, db)
|
||||
|
||||
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import AsyncQdrantClient
|
||||
|
||||
grouped_data = await add_node_connection(graph_client, db, node_descriptions)
|
||||
|
||||
llm_client = get_llm_client()
|
||||
|
||||
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
|
||||
|
||||
results = await adapted_qdrant_batch_search(relationship_dict, db)
|
||||
relationship_d = graph_ready_output(results)
|
||||
|
||||
CONNECTED_GRAPH = connect_nodes_in_graph(F, relationship_d)
|
||||
return CONNECTED_GRAPH
|
||||
|
||||
#
|
||||
# grouped_data = {}
|
||||
#
|
||||
# # Iterate through each dictionary in the list
|
||||
# for item in node_descriptions:
|
||||
# # Get the layer_decomposition_uuid of the current dictionary
|
||||
# uuid = item['layer_decomposition_uuid']
|
||||
#
|
||||
# # Check if this uuid is already a key in the grouped_data dictionary
|
||||
# if uuid not in grouped_data:
|
||||
# # If not, initialize a new list for this uuid
|
||||
# grouped_data[uuid] = []
|
||||
#
|
||||
# # Append the current dictionary to the list corresponding to its uuid
|
||||
# grouped_data[uuid].append(item)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class QDrantAdapter(VectorDBInterface):
|
|||
quantization_config = collection_config.quantization_config
|
||||
)
|
||||
|
||||
async def create_data_points(self, collection_name: str, data_points: List[any]):
|
||||
async def create_data_points(self, collection_name: str, data_points):
|
||||
client = self.get_qdrant_client()
|
||||
|
||||
return await client.upload_points(
|
||||
|
|
@ -96,11 +96,11 @@ class QDrantAdapter(VectorDBInterface):
|
|||
# vector= embedding,
|
||||
limit=3,
|
||||
with_vector=False
|
||||
) for embedding in embeddings
|
||||
) for embedding in [embeddings]
|
||||
]
|
||||
|
||||
# Perform batch search with the dynamically generated requests
|
||||
results = client.search_batch(
|
||||
results = await client.search_batch(
|
||||
collection_name=collection_name,
|
||||
requests=requests
|
||||
)
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class VectorDBInterface(Protocol):
|
|||
async def create_data_points(
|
||||
self,
|
||||
collection_name: str,
|
||||
data_points: List[any]
|
||||
data_points
|
||||
): raise NotImplementedError
|
||||
|
||||
# @abstractmethod
|
||||
|
|
|
|||
|
|
@ -0,0 +1,113 @@
|
|||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognitive_architecture.shared.data_models import GraphDBType
|
||||
|
||||
|
||||
def extract_node_descriptions(data):
|
||||
descriptions = []
|
||||
for node_id, attributes in data:
|
||||
if 'description' in attributes and 'id' in attributes:
|
||||
descriptions.append({'node_id': attributes['id'], 'description': attributes['description'], 'layer_uuid': attributes['layer_uuid'], 'layer_decomposition_uuid': attributes['layer_decomposition_uuid'] })
|
||||
return descriptions
|
||||
|
||||
|
||||
|
||||
|
||||
def add_node_connection(graph_client, vector_database_client, data):
|
||||
|
||||
graph = graph_client.graph
|
||||
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
|
||||
|
||||
|
||||
grouped_data = {}
|
||||
|
||||
# Iterate through each dictionary in the list
|
||||
for item in node_descriptions:
|
||||
# Get the layer_decomposition_uuid of the current dictionary
|
||||
uuid = item['layer_decomposition_uuid']
|
||||
|
||||
# Check if this uuid is already a key in the grouped_data dictionary
|
||||
if uuid not in grouped_data:
|
||||
# If not, initialize a new list for this uuid
|
||||
grouped_data[uuid] = []
|
||||
|
||||
# Append the current dictionary to the list corresponding to its uuid
|
||||
grouped_data[uuid].append(item)
|
||||
|
||||
return grouped_data
|
||||
|
||||
|
||||
def connect_nodes_in_graph(graph, relationship_dict):
|
||||
"""
|
||||
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
|
||||
If they do, create a connection (edge) between them.
|
||||
|
||||
:param graph: A NetworkX graph object
|
||||
:param relationship_dict: A dictionary containing relationships between nodes
|
||||
"""
|
||||
for id, relationships in relationship_dict.items():
|
||||
for relationship in relationships:
|
||||
searched_node_attr_id = relationship['searched_node_id']
|
||||
print(searched_node_attr_id)
|
||||
score_attr_id = relationship['original_id_for_search']
|
||||
score = relationship['score']
|
||||
|
||||
# Initialize node keys for both searched_node and score_node
|
||||
searched_node_key, score_node_key = None, None
|
||||
|
||||
# Find nodes in the graph that match the searched_node_id and score_id from their attributes
|
||||
for node, attrs in graph.nodes(data=True):
|
||||
if 'id' in attrs: # Ensure there is an 'id' attribute
|
||||
if attrs['id'] == searched_node_attr_id:
|
||||
searched_node_key = node
|
||||
elif attrs['id'] == score_attr_id:
|
||||
score_node_key = node
|
||||
|
||||
# If both nodes are found, no need to continue checking other nodes
|
||||
if searched_node_key and score_node_key:
|
||||
break
|
||||
|
||||
# Check if both nodes were found in the graph
|
||||
if searched_node_key is not None and score_node_key is not None:
|
||||
print(searched_node_key)
|
||||
print(score_node_key)
|
||||
# If both nodes exist, create an edge between them
|
||||
# You can customize the edge attributes as needed, here we use 'score' as an attribute
|
||||
graph.add_edge(searched_node_key, score_node_key, weight=score,
|
||||
score_metadata=relationship.get('score_metadata'))
|
||||
|
||||
return graph
|
||||
def graph_ready_output(results):
|
||||
relationship_dict = {}
|
||||
|
||||
for result_tuple in results:
|
||||
|
||||
uuid, scored_points_list, desc, node_id = result_tuple
|
||||
# Unpack the tuple
|
||||
|
||||
# Ensure there's a list to collect related items for this uuid
|
||||
if uuid not in relationship_dict:
|
||||
relationship_dict[uuid] = []
|
||||
|
||||
for scored_points in scored_points_list: # Iterate over the list of ScoredPoint lists
|
||||
for scored_point in scored_points: # Iterate over each ScoredPoint object
|
||||
if scored_point.score > 0.9: # Check the score condition
|
||||
# Append a new dictionary to the list associated with the uuid
|
||||
relationship_dict[uuid].append({
|
||||
'collection_name_uuid': uuid,
|
||||
'searched_node_id': scored_point.id,
|
||||
'score': scored_point.score,
|
||||
'score_metadata': scored_point.payload,
|
||||
'original_id_for_search': node_id,
|
||||
})
|
||||
return relationship_dict
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
add_node_connection(graph_client, None, None)
|
||||
|
||||
|
||||
|
||||
# db = get_vector_database()
|
||||
|
|
@ -68,7 +68,7 @@ async def add_propositions(G, category_name, subclass_content, layer_description
|
|||
async def append_to_graph(layer_graphs, required_layers, G):
|
||||
# Generate a UUID for the overall layer
|
||||
layer_uuid = uuid.uuid4()
|
||||
|
||||
decomposition_uuids = set()
|
||||
# Extract category name from required_layers data
|
||||
category_name = required_layers.dict()['label']['type']
|
||||
|
||||
|
|
@ -84,7 +84,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
|
|||
|
||||
# Generate a UUID for this particular layer decomposition
|
||||
layer_decomposition_uuid = uuid.uuid4()
|
||||
|
||||
decomposition_uuids.add(layer_decomposition_uuid)
|
||||
# Assuming append_data_to_graph is defined elsewhere and appends data to G
|
||||
# You would pass relevant information from knowledge_graph along with other details to this function
|
||||
F = await add_propositions(G, category_name, subgroup_name, layer_description, knowledge_graph,
|
||||
|
|
@ -93,7 +93,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
|
|||
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
|
||||
print("Updated Nodes:", F.graph.nodes(data=True))
|
||||
|
||||
return F
|
||||
return F, decomposition_uuids
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
import asyncio
|
||||
|
||||
|
||||
async def process_items(grouped_data, unique_layer_uuids, llm_client):
|
||||
results_to_check = [] # This will hold results excluding self comparisons
|
||||
tasks = [] # List to hold all tasks
|
||||
task_to_info = {} # Dictionary to map tasks to their corresponding group id and item info
|
||||
|
||||
# Iterate through each group in grouped_data
|
||||
for group_id, items in grouped_data.items():
|
||||
# Filter unique_layer_uuids to exclude the current group_id
|
||||
target_uuids = [uuid for uuid in unique_layer_uuids if uuid != group_id]
|
||||
|
||||
# Process each item in the group
|
||||
for item in items:
|
||||
# For each target UUID, create an async task for the item's embedding retrieval
|
||||
for target_id in target_uuids:
|
||||
task = asyncio.create_task(
|
||||
llm_client.async_get_embedding_with_backoff(item['description'], "text-embedding-3-large"))
|
||||
tasks.append(task)
|
||||
# Map the task to the target id, item's node_id, and description for later retrieval
|
||||
task_to_info[task] = (target_id, item['node_id'], group_id, item['description'])
|
||||
|
||||
# Await all tasks to complete and gather results
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# Process the results, associating them with their target id, node id, and description
|
||||
for task, embedding in zip(tasks, results):
|
||||
target_id, node_id, group_id, description = task_to_info[task]
|
||||
results_to_check.append([target_id, embedding, description, node_id, group_id])
|
||||
|
||||
return results_to_check
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
process_items()
|
||||
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
|
||||
async def adapted_qdrant_batch_search(results_to_check, client):
|
||||
async def adapted_qdrant_batch_search(results_to_check,vector_client):
|
||||
search_results_list = []
|
||||
|
||||
for result in results_to_check:
|
||||
|
|
@ -15,9 +15,10 @@ async def adapted_qdrant_batch_search(results_to_check, client):
|
|||
limits = [3] * len(embedding) # Set a limit of 3 results for this embedding
|
||||
|
||||
try:
|
||||
# Perform the batch search for this id with its embedding
|
||||
# Assuming qdrant_batch_search function accepts a single embedding and a list of limits
|
||||
id_search_results = await client.batch_search(id, [embedding], limits)
|
||||
#Perform the batch search for this id with its embedding
|
||||
#Assuming qdrant_batch_search function accepts a single embedding and a list of limits
|
||||
#qdrant_batch_search
|
||||
id_search_results = await vector_client.batch_search(collection_name = id, embeddings= embedding, with_vectors=limits)
|
||||
search_results_list.append((id, id_search_results, node_id, target))
|
||||
except Exception as e:
|
||||
print(f"Error during batch search for ID {id}: {e}")
|
||||
|
|
@ -26,4 +27,6 @@ async def adapted_qdrant_batch_search(results_to_check, client):
|
|||
return search_results_list
|
||||
|
||||
|
||||
client = get_vector_database()
|
||||
if __name__ == '__main__':
|
||||
|
||||
client = get_vector_database()
|
||||
|
|
@ -28,6 +28,7 @@ async def upload_embedding(id, metadata, some_embeddings, collection_name, clien
|
|||
async def add_propositions(node_descriptions, client):
|
||||
for item in node_descriptions:
|
||||
print(item['node_id'])
|
||||
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = get_embeddings(item['description']), collection_name= item['layer_decomposition_uuid'],client= client)
|
||||
embeddings = await get_embeddings(item['description'])
|
||||
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = embeddings[0], collection_name= item['layer_decomposition_uuid'],client= client)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue