Add utils for graph visualization + classification nodes
This commit is contained in:
parent
faf7e6ae59
commit
7e964bcb23
12 changed files with 1487 additions and 1610 deletions
2866
Demo_graph.ipynb
2866
Demo_graph.ipynb
File diff suppressed because one or more lines are too long
0
cognitive_architecture/api/__init__.py
Normal file
0
cognitive_architecture/api/__init__.py
Normal file
0
cognitive_architecture/api/v1/__init__.py
Normal file
0
cognitive_architecture/api/v1/__init__.py
Normal file
|
|
@ -14,8 +14,13 @@ from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
|
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
|
||||||
|
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes
|
from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes
|
||||||
|
from cognitive_architecture.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
|
||||||
|
connect_nodes_in_graph
|
||||||
from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph
|
from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph
|
||||||
|
from cognitive_architecture.modules.cognify.llm.add_node_connection_embeddings import process_items
|
||||||
|
from cognitive_architecture.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
|
||||||
from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions
|
from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
|
|
@ -146,7 +151,7 @@ async def cognify(input_text:str):
|
||||||
|
|
||||||
await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1)
|
await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1)
|
||||||
|
|
||||||
await append_to_graph(layer_1_graph, required_layers_one, graph_client)
|
F, unique_layer_uuids = await append_to_graph(layer_1_graph, required_layers_one, graph_client)
|
||||||
|
|
||||||
def extract_node_descriptions(data):
|
def extract_node_descriptions(data):
|
||||||
descriptions = []
|
descriptions = []
|
||||||
|
|
@ -158,10 +163,10 @@ async def cognify(input_text:str):
|
||||||
return descriptions
|
return descriptions
|
||||||
|
|
||||||
# Extract the node descriptions
|
# Extract the node descriptions
|
||||||
graph = await graph_client.graph()
|
graph = graph_client.graph
|
||||||
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
|
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
|
||||||
unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
|
# unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions)
|
||||||
|
#
|
||||||
db = get_vector_database()
|
db = get_vector_database()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -178,10 +183,43 @@ async def cognify(input_text:str):
|
||||||
for layer in unique_layer_uuids:
|
for layer in unique_layer_uuids:
|
||||||
await db.create_collection(layer,collection_config)
|
await db.create_collection(layer,collection_config)
|
||||||
|
|
||||||
#to check if it works
|
# #to check if it works
|
||||||
|
#
|
||||||
await add_propositions(node_descriptions, db)
|
await add_propositions(node_descriptions, db)
|
||||||
|
|
||||||
|
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import AsyncQdrantClient
|
||||||
|
|
||||||
|
grouped_data = await add_node_connection(graph_client, db, node_descriptions)
|
||||||
|
|
||||||
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
|
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
|
||||||
|
|
||||||
|
results = await adapted_qdrant_batch_search(relationship_dict, db)
|
||||||
|
relationship_d = graph_ready_output(results)
|
||||||
|
|
||||||
|
CONNECTED_GRAPH = connect_nodes_in_graph(F, relationship_d)
|
||||||
|
return CONNECTED_GRAPH
|
||||||
|
|
||||||
|
#
|
||||||
|
# grouped_data = {}
|
||||||
|
#
|
||||||
|
# # Iterate through each dictionary in the list
|
||||||
|
# for item in node_descriptions:
|
||||||
|
# # Get the layer_decomposition_uuid of the current dictionary
|
||||||
|
# uuid = item['layer_decomposition_uuid']
|
||||||
|
#
|
||||||
|
# # Check if this uuid is already a key in the grouped_data dictionary
|
||||||
|
# if uuid not in grouped_data:
|
||||||
|
# # If not, initialize a new list for this uuid
|
||||||
|
# grouped_data[uuid] = []
|
||||||
|
#
|
||||||
|
# # Append the current dictionary to the list corresponding to its uuid
|
||||||
|
# grouped_data[uuid].append(item)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ class QDrantAdapter(VectorDBInterface):
|
||||||
quantization_config = collection_config.quantization_config
|
quantization_config = collection_config.quantization_config
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_data_points(self, collection_name: str, data_points: List[any]):
|
async def create_data_points(self, collection_name: str, data_points):
|
||||||
client = self.get_qdrant_client()
|
client = self.get_qdrant_client()
|
||||||
|
|
||||||
return await client.upload_points(
|
return await client.upload_points(
|
||||||
|
|
@ -96,11 +96,11 @@ class QDrantAdapter(VectorDBInterface):
|
||||||
# vector= embedding,
|
# vector= embedding,
|
||||||
limit=3,
|
limit=3,
|
||||||
with_vector=False
|
with_vector=False
|
||||||
) for embedding in embeddings
|
) for embedding in [embeddings]
|
||||||
]
|
]
|
||||||
|
|
||||||
# Perform batch search with the dynamically generated requests
|
# Perform batch search with the dynamically generated requests
|
||||||
results = client.search_batch(
|
results = await client.search_batch(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
requests=requests
|
requests=requests
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class VectorDBInterface(Protocol):
|
||||||
async def create_data_points(
|
async def create_data_points(
|
||||||
self,
|
self,
|
||||||
collection_name: str,
|
collection_name: str,
|
||||||
data_points: List[any]
|
data_points
|
||||||
): raise NotImplementedError
|
): raise NotImplementedError
|
||||||
|
|
||||||
# @abstractmethod
|
# @abstractmethod
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
|
from cognitive_architecture.shared.data_models import GraphDBType
|
||||||
|
|
||||||
|
|
||||||
|
def extract_node_descriptions(data):
|
||||||
|
descriptions = []
|
||||||
|
for node_id, attributes in data:
|
||||||
|
if 'description' in attributes and 'id' in attributes:
|
||||||
|
descriptions.append({'node_id': attributes['id'], 'description': attributes['description'], 'layer_uuid': attributes['layer_uuid'], 'layer_decomposition_uuid': attributes['layer_decomposition_uuid'] })
|
||||||
|
return descriptions
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def add_node_connection(graph_client, vector_database_client, data):
|
||||||
|
|
||||||
|
graph = graph_client.graph
|
||||||
|
node_descriptions = extract_node_descriptions(graph.nodes(data=True))
|
||||||
|
|
||||||
|
|
||||||
|
grouped_data = {}
|
||||||
|
|
||||||
|
# Iterate through each dictionary in the list
|
||||||
|
for item in node_descriptions:
|
||||||
|
# Get the layer_decomposition_uuid of the current dictionary
|
||||||
|
uuid = item['layer_decomposition_uuid']
|
||||||
|
|
||||||
|
# Check if this uuid is already a key in the grouped_data dictionary
|
||||||
|
if uuid not in grouped_data:
|
||||||
|
# If not, initialize a new list for this uuid
|
||||||
|
grouped_data[uuid] = []
|
||||||
|
|
||||||
|
# Append the current dictionary to the list corresponding to its uuid
|
||||||
|
grouped_data[uuid].append(item)
|
||||||
|
|
||||||
|
return grouped_data
|
||||||
|
|
||||||
|
|
||||||
|
def connect_nodes_in_graph(graph, relationship_dict):
|
||||||
|
"""
|
||||||
|
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
|
||||||
|
If they do, create a connection (edge) between them.
|
||||||
|
|
||||||
|
:param graph: A NetworkX graph object
|
||||||
|
:param relationship_dict: A dictionary containing relationships between nodes
|
||||||
|
"""
|
||||||
|
for id, relationships in relationship_dict.items():
|
||||||
|
for relationship in relationships:
|
||||||
|
searched_node_attr_id = relationship['searched_node_id']
|
||||||
|
print(searched_node_attr_id)
|
||||||
|
score_attr_id = relationship['original_id_for_search']
|
||||||
|
score = relationship['score']
|
||||||
|
|
||||||
|
# Initialize node keys for both searched_node and score_node
|
||||||
|
searched_node_key, score_node_key = None, None
|
||||||
|
|
||||||
|
# Find nodes in the graph that match the searched_node_id and score_id from their attributes
|
||||||
|
for node, attrs in graph.nodes(data=True):
|
||||||
|
if 'id' in attrs: # Ensure there is an 'id' attribute
|
||||||
|
if attrs['id'] == searched_node_attr_id:
|
||||||
|
searched_node_key = node
|
||||||
|
elif attrs['id'] == score_attr_id:
|
||||||
|
score_node_key = node
|
||||||
|
|
||||||
|
# If both nodes are found, no need to continue checking other nodes
|
||||||
|
if searched_node_key and score_node_key:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check if both nodes were found in the graph
|
||||||
|
if searched_node_key is not None and score_node_key is not None:
|
||||||
|
print(searched_node_key)
|
||||||
|
print(score_node_key)
|
||||||
|
# If both nodes exist, create an edge between them
|
||||||
|
# You can customize the edge attributes as needed, here we use 'score' as an attribute
|
||||||
|
graph.add_edge(searched_node_key, score_node_key, weight=score,
|
||||||
|
score_metadata=relationship.get('score_metadata'))
|
||||||
|
|
||||||
|
return graph
|
||||||
|
def graph_ready_output(results):
|
||||||
|
relationship_dict = {}
|
||||||
|
|
||||||
|
for result_tuple in results:
|
||||||
|
|
||||||
|
uuid, scored_points_list, desc, node_id = result_tuple
|
||||||
|
# Unpack the tuple
|
||||||
|
|
||||||
|
# Ensure there's a list to collect related items for this uuid
|
||||||
|
if uuid not in relationship_dict:
|
||||||
|
relationship_dict[uuid] = []
|
||||||
|
|
||||||
|
for scored_points in scored_points_list: # Iterate over the list of ScoredPoint lists
|
||||||
|
for scored_point in scored_points: # Iterate over each ScoredPoint object
|
||||||
|
if scored_point.score > 0.9: # Check the score condition
|
||||||
|
# Append a new dictionary to the list associated with the uuid
|
||||||
|
relationship_dict[uuid].append({
|
||||||
|
'collection_name_uuid': uuid,
|
||||||
|
'searched_node_id': scored_point.id,
|
||||||
|
'score': scored_point.score,
|
||||||
|
'score_metadata': scored_point.payload,
|
||||||
|
'original_id_for_search': node_id,
|
||||||
|
})
|
||||||
|
return relationship_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||||
|
add_node_connection(graph_client, None, None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# db = get_vector_database()
|
||||||
|
|
@ -68,7 +68,7 @@ async def add_propositions(G, category_name, subclass_content, layer_description
|
||||||
async def append_to_graph(layer_graphs, required_layers, G):
|
async def append_to_graph(layer_graphs, required_layers, G):
|
||||||
# Generate a UUID for the overall layer
|
# Generate a UUID for the overall layer
|
||||||
layer_uuid = uuid.uuid4()
|
layer_uuid = uuid.uuid4()
|
||||||
|
decomposition_uuids = set()
|
||||||
# Extract category name from required_layers data
|
# Extract category name from required_layers data
|
||||||
category_name = required_layers.dict()['label']['type']
|
category_name = required_layers.dict()['label']['type']
|
||||||
|
|
||||||
|
|
@ -84,7 +84,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
|
||||||
|
|
||||||
# Generate a UUID for this particular layer decomposition
|
# Generate a UUID for this particular layer decomposition
|
||||||
layer_decomposition_uuid = uuid.uuid4()
|
layer_decomposition_uuid = uuid.uuid4()
|
||||||
|
decomposition_uuids.add(layer_decomposition_uuid)
|
||||||
# Assuming append_data_to_graph is defined elsewhere and appends data to G
|
# Assuming append_data_to_graph is defined elsewhere and appends data to G
|
||||||
# You would pass relevant information from knowledge_graph along with other details to this function
|
# You would pass relevant information from knowledge_graph along with other details to this function
|
||||||
F = await add_propositions(G, category_name, subgroup_name, layer_description, knowledge_graph,
|
F = await add_propositions(G, category_name, subgroup_name, layer_description, knowledge_graph,
|
||||||
|
|
@ -93,7 +93,7 @@ async def append_to_graph(layer_graphs, required_layers, G):
|
||||||
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
|
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
|
||||||
print("Updated Nodes:", F.graph.nodes(data=True))
|
print("Updated Nodes:", F.graph.nodes(data=True))
|
||||||
|
|
||||||
return F
|
return F, decomposition_uuids
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
|
async def process_items(grouped_data, unique_layer_uuids, llm_client):
|
||||||
|
results_to_check = [] # This will hold results excluding self comparisons
|
||||||
|
tasks = [] # List to hold all tasks
|
||||||
|
task_to_info = {} # Dictionary to map tasks to their corresponding group id and item info
|
||||||
|
|
||||||
|
# Iterate through each group in grouped_data
|
||||||
|
for group_id, items in grouped_data.items():
|
||||||
|
# Filter unique_layer_uuids to exclude the current group_id
|
||||||
|
target_uuids = [uuid for uuid in unique_layer_uuids if uuid != group_id]
|
||||||
|
|
||||||
|
# Process each item in the group
|
||||||
|
for item in items:
|
||||||
|
# For each target UUID, create an async task for the item's embedding retrieval
|
||||||
|
for target_id in target_uuids:
|
||||||
|
task = asyncio.create_task(
|
||||||
|
llm_client.async_get_embedding_with_backoff(item['description'], "text-embedding-3-large"))
|
||||||
|
tasks.append(task)
|
||||||
|
# Map the task to the target id, item's node_id, and description for later retrieval
|
||||||
|
task_to_info[task] = (target_id, item['node_id'], group_id, item['description'])
|
||||||
|
|
||||||
|
# Await all tasks to complete and gather results
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Process the results, associating them with their target id, node id, and description
|
||||||
|
for task, embedding in zip(tasks, results):
|
||||||
|
target_id, node_id, group_id, description = task_to_info[task]
|
||||||
|
results_to_check.append([target_id, embedding, description, node_id, group_id])
|
||||||
|
|
||||||
|
return results_to_check
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
process_items()
|
||||||
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||||
|
|
||||||
async def adapted_qdrant_batch_search(results_to_check, client):
|
async def adapted_qdrant_batch_search(results_to_check,vector_client):
|
||||||
search_results_list = []
|
search_results_list = []
|
||||||
|
|
||||||
for result in results_to_check:
|
for result in results_to_check:
|
||||||
|
|
@ -15,9 +15,10 @@ async def adapted_qdrant_batch_search(results_to_check, client):
|
||||||
limits = [3] * len(embedding) # Set a limit of 3 results for this embedding
|
limits = [3] * len(embedding) # Set a limit of 3 results for this embedding
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Perform the batch search for this id with its embedding
|
#Perform the batch search for this id with its embedding
|
||||||
# Assuming qdrant_batch_search function accepts a single embedding and a list of limits
|
#Assuming qdrant_batch_search function accepts a single embedding and a list of limits
|
||||||
id_search_results = await client.batch_search(id, [embedding], limits)
|
#qdrant_batch_search
|
||||||
|
id_search_results = await vector_client.batch_search(collection_name = id, embeddings= embedding, with_vectors=limits)
|
||||||
search_results_list.append((id, id_search_results, node_id, target))
|
search_results_list.append((id, id_search_results, node_id, target))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error during batch search for ID {id}: {e}")
|
print(f"Error during batch search for ID {id}: {e}")
|
||||||
|
|
@ -26,4 +27,6 @@ async def adapted_qdrant_batch_search(results_to_check, client):
|
||||||
return search_results_list
|
return search_results_list
|
||||||
|
|
||||||
|
|
||||||
client = get_vector_database()
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
client = get_vector_database()
|
||||||
|
|
@ -28,6 +28,7 @@ async def upload_embedding(id, metadata, some_embeddings, collection_name, clien
|
||||||
async def add_propositions(node_descriptions, client):
|
async def add_propositions(node_descriptions, client):
|
||||||
for item in node_descriptions:
|
for item in node_descriptions:
|
||||||
print(item['node_id'])
|
print(item['node_id'])
|
||||||
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = get_embeddings(item['description']), collection_name= item['layer_decomposition_uuid'],client= client)
|
embeddings = await get_embeddings(item['description'])
|
||||||
|
await upload_embedding(id = item['node_id'], metadata = {"meta":item['description']}, some_embeddings = embeddings[0], collection_name= item['layer_decomposition_uuid'],client= client)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue