import asyncio import logging from datetime import datetime from langchain.prompts import ChatPromptTemplate import json from langchain.document_loaders import TextLoader from langchain.document_loaders import DirectoryLoader from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI import re from dotenv import load_dotenv import os from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client from cognitive_architecture.modules.cognify.graph.add_classification_nodes import add_classification_nodes from cognitive_architecture.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \ connect_nodes_in_graph from cognitive_architecture.modules.cognify.graph.add_propositions import append_to_graph from cognitive_architecture.modules.cognify.llm.add_node_connection_embeddings import process_items from cognitive_architecture.modules.cognify.vector.batch_search import adapted_qdrant_batch_search from cognitive_architecture.modules.cognify.vector.load_propositions import add_propositions from cognitive_architecture.utils import render_graph # Load environment variables from .env file load_dotenv() import instructor from openai import OpenAI aclient = instructor.patch(OpenAI()) from typing import Optional, List, Type from pydantic import BaseModel, Field from cognitive_architecture.modules.cognify.llm.classify_content import classify_into_categories from cognitive_architecture.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers from cognitive_architecture.modules.cognify.llm.content_to_propositions import generate_graph from cognitive_architecture.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer from cognitive_architecture.modules.cognify.graph.create import create_semantic_graph from typing import Optional, Any from pydantic import BaseModel from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation from qdrant_client import models from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database async def cognify(input_text:str): """This function is responsible for the cognitive processing of the content.""" # Load the content from the text file input_article_one= input_text # Classify the content into categories required_layers_one = await classify_into_categories(input_article_one, "classify_content.txt", DefaultContentPrediction) def transform_dict(original): # Extract the first subclass from the list (assuming there could be more) subclass_enum = original['label']['subclass'][0] # The data type is derived from 'type' and converted to lowercase data_type = original['label']['type'].lower() # The context name is the name of the Enum member (e.g., 'NEWS_STORIES') context_name = subclass_enum.name.replace('_', ' ').title() # The layer name is the value of the Enum member (e.g., 'News stories and blog posts') layer_name = subclass_enum.value # Construct the new dictionary new_dict = { 'data_type': data_type, 'context_name': data_type.upper(), # llm context classification 'layer_name': layer_name # llm layer classification } return new_dict # Transform the original dictionary transformed_dict_1 = transform_dict(required_layers_one.dict()) cognitive_layers_one = await content_to_cog_layers("generate_cog_layers.txt", transformed_dict_1, response_model=DefaultCognitiveLayer) cognitive_layers_one = [layer_subgroup.name for layer_subgroup in cognitive_layers_one.cognitive_layers] async def generate_graphs_for_all_layers(text_input: str, layers: List[str], response_model: Type[BaseModel]): tasks = [generate_graph(text_input, "generate_graph_prompt.txt", {'layer': layer}, response_model) for layer in layers] return await asyncio.gather(*tasks) # Execute the async function and print results for each set of layers async def async_graph_per_layer(text_input: str, cognitive_layers: List[str]): graphs = await generate_graphs_for_all_layers(text_input, cognitive_layers, KnowledgeGraph) # for layer, graph in zip(cognitive_layers, graphs): # print(f"{layer}: {graph}") return graphs # Run the async function for each set of cognitive layers layer_1_graph = await async_graph_per_layer(input_article_one, cognitive_layers_one) print(layer_1_graph) graph_client = get_graph_client(GraphDBType.NETWORKX) # Define a GraphModel instance with example data graph_model_instance = DefaultGraphModel( id="user123", documents=[ Document( doc_id="doc1", title="Document 1", summary="Summary of Document 1", content_id="content_id_for_doc1", doc_type=DocumentType(type_id="PDF", description="Portable Document Format"), categories=[ Category(category_id="finance", name="Finance", default_relationship=Relationship(type="belongs_to")), Category(category_id="tech", name="Technology", default_relationship=Relationship(type="belongs_to")) ], default_relationship=Relationship(type='has_document') ), Document( doc_id="doc2", title="Document 2", summary="Summary of Document 2", content_id="content_id_for_doc2", doc_type=DocumentType(type_id="TXT", description="Text File"), categories=[ Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")), Category(category_id="wellness", name="Wellness", default_relationship=Relationship(type="belongs_to")) ], default_relationship=Relationship(type='has_document') ) ], user_properties=UserProperties( custom_properties={"age": "30"}, location=UserLocation(location_id="ny", description="New York", default_relationship=Relationship(type='located_in')) ), default_fields={ 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") } ) G = await create_semantic_graph(graph_model_instance, graph_client) await add_classification_nodes(graph_client, 'Document:doc1', transformed_dict_1) F, unique_layer_uuids = await append_to_graph(layer_1_graph, required_layers_one, graph_client) def extract_node_descriptions(data): descriptions = [] for node_id, attributes in data: if 'description' in attributes and 'id' in attributes: descriptions.append({'node_id': attributes['id'], 'description': attributes['description'], 'layer_uuid': attributes['layer_uuid'], 'layer_decomposition_uuid': attributes['layer_decomposition_uuid']}) return descriptions # Extract the node descriptions graph = graph_client.graph node_descriptions = extract_node_descriptions(graph.nodes(data=True)) # unique_layer_uuids = set(node['layer_decomposition_uuid'] for node in node_descriptions) # db = get_vector_database() collection_config = CollectionConfig( vector_config={ 'content': models.VectorParams( distance=models.Distance.COSINE, size=3072 ) }, # Set other configs as needed ) for layer in unique_layer_uuids: await db.create_collection(layer,collection_config) # #to check if it works # await add_propositions(node_descriptions, db) from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import AsyncQdrantClient grouped_data = await add_node_connection(graph_client, db, node_descriptions) llm_client = get_llm_client() relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client) results = await adapted_qdrant_batch_search(relationship_dict, db) relationship_d = graph_ready_output(results) CONNECTED_GRAPH = connect_nodes_in_graph(F, relationship_d) out = await render_graph(CONNECTED_GRAPH, graph_type='networkx') print(out) return CONNECTED_GRAPH # # grouped_data = {} # # # Iterate through each dictionary in the list # for item in node_descriptions: # # Get the layer_decomposition_uuid of the current dictionary # uuid = item['layer_decomposition_uuid'] # # # Check if this uuid is already a key in the grouped_data dictionary # if uuid not in grouped_data: # # If not, initialize a new list for this uuid # grouped_data[uuid] = [] # # # Append the current dictionary to the list corresponding to its uuid # grouped_data[uuid].append(item) if __name__ == "__main__": asyncio.run(cognify("The quick brown fox jumps over the lazy dog"))