Merge remote-tracking branch 'origin/feat/COG-24-add-qdrant' into feat/COG-24-add-qdrant

This commit is contained in:
Vasilije 2024-03-13 15:33:18 +01:00
commit 801069b4c0
13 changed files with 221 additions and 488 deletions

File diff suppressed because one or more lines are too long

View file

@ -27,7 +27,7 @@ async def add_standalone(
promises = []
for data_item in data:
promises.append(add(data_item, dataset_id, dataset_name))
promises.append(add_standalone(data_item, dataset_id, dataset_name))
results = await asyncio.gather(*promises)
@ -36,7 +36,7 @@ async def add_standalone(
if is_data_path(data):
with open(data.replace("file://", ""), "rb") as file:
return await add(file, dataset_id, dataset_name)
return await add_standalone(file, dataset_id, dataset_name)
classified_data = ingestion.classify(data)

View file

@ -21,39 +21,45 @@ from cognitive_architecture.modules.cognify.llm.classify_content import classify
from cognitive_architecture.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers
from cognitive_architecture.modules.cognify.llm.generate_graph import generate_graph
from cognitive_architecture.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer
from cognitive_architecture.modules.cognify.graph.create import create_semantic_graph
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
from cognitive_architecture.infrastructure.databases.relational import DuckDBAdapter
from cognitive_architecture.modules.cognify.graph.add_document_node import add_document_node
from cognitive_architecture.modules.cognify.graph.initialize_graph import initialize_graph
config = Config()
config.load()
aclient = instructor.patch(OpenAI())
USER_ID = "default_user"
async def cognify(dataset_name: str):
"""This function is responsible for the cognitive processing of the content."""
db = DuckDBAdapter()
files_metadata = db.get_files_metadata(dataset_name)
files = list(files_metadata["file_path"].values())
awaitables = []
for file in files:
with open(file, "rb") as file:
await initialize_graph(USER_ID)
for file_metadata in files_metadata:
with open(file_metadata["file_path"], "rb") as file:
elements = partition_pdf(file = file, strategy = "fast")
text = "\n".join(map(lambda element: clean(element.text), elements))
awaitables.append(process_text(text))
awaitables.append(process_text(text, file_metadata))
graphs = await asyncio.gather(*awaitables)
return graphs[0]
async def process_text(input_text: str):
classified_categories = None
async def process_text(input_text: str, file_metadata: dict):
print(f"Processing document ({file_metadata['id']})")
classified_categories = []
try:
# Classify the content into categories
@ -62,13 +68,17 @@ async def process_text(input_text: str):
"classify_content.txt",
DefaultContentPrediction
)
file_metadata["categories"] = list(map(lambda category: category["layer_name"], classified_categories))
except Exception as e:
print(e)
raise e
await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata)
print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}")
cognitive_layers = await content_to_cog_layers(
"generate_cog_layers.txt",
classified_categories,
classified_categories[0],
response_model = DefaultCognitiveLayer
)
@ -84,73 +94,17 @@ async def process_text(input_text: str):
layer_graphs = await generate_graph_per_layer(input_text, cognitive_layers)
# print(layer_graphs)
# ADD SUMMARY
# ADD CATEGORIES
print(f"Document ({file_metadata['id']}) layer graphs created")
# Define a GraphModel instance with example data
# graph_model_instance = DefaultGraphModel(
# id="user123",
# documents=[
# Document(
# doc_id = "doc1",
# title = "Document 1",
# summary = "Summary of Document 1",
# content_id = "content_id_for_doc1",
# doc_type = DocumentType(type_id = "PDF", description = "Portable Document Format"),
# categories = [
# Category(
# category_id = "finance",
# name = "Finance",
# default_relationship = Relationship(type = "belongs_to")
# ),
# Category(
# category_id = "tech",
# name = "Technology",
# default_relationship = Relationship(type = "belongs_to")
# )
# ],
# default_relationship = Relationship(type="has_document")
# ),
# Document(
# doc_id = "doc2",
# title = "Document 2",
# summary = "Summary of Document 2",
# content_id = "content_id_for_doc2",
# doc_type = DocumentType(type_id = "TXT", description = "Text File"),
# categories = [
# Category(
# category_id = "health",
# name = "Health",
# default_relationship = Relationship(type="belongs_to")
# ),
# Category(
# category_id = "wellness",
# name = "Wellness",
# default_relationship = Relationship(type="belongs_to")
# )
# ],
# default_relationship = Relationship(type = "has_document")
# )
# ],
# user_properties = UserProperties(
# custom_properties = {"age": "30"},
# location = UserLocation(
# location_id = "ny",
# description = "New York",
# default_relationship = Relationship(type = "located_in"))
# ),
# default_fields={
# "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# }
# )
# G = await create_semantic_graph(graph_model_instance)
await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0])
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0])
print(f"Document ({file_metadata['id']}) layers connected")
graph_client = get_graph_client(GraphDBType.NETWORKX)
# G = await create_semantic_graph(graph_model_instance, graph_client)
await add_classification_nodes("Document:doc1", classified_categories)
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories, graph_client)
await graph_client.load_graph_from_file()
@ -169,7 +123,6 @@ async def process_text(input_text: str):
size = 3072
)
},
# Set other configs as needed
)
try:
@ -179,25 +132,14 @@ async def process_text(input_text: str):
except Exception as e:
print(e)
# from qdrant_client import QdrantClient
# qdrant = QdrantClient(
# url=os.getenv("QDRANT_URL"),
# api_key=os.getenv("QDRANT_API_KEY"))
#
# collections_response = qdrant.http.collections_api.get_collections()
# collections = collections_response.result.collections
# print(collections)
await add_propositions(node_descriptions)
grouped_data = await add_node_connection(node_descriptions)
# print("we are here, grouped_data", grouped_data)
llm_client = get_llm_client()
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
# print("we are here", relationship_dict[0])
results = await adapted_qdrant_batch_search(relationship_dict, db)
@ -208,23 +150,9 @@ async def process_text(input_text: str):
connect_nodes_in_graph(graph, relationship_d)
return graph
print(f"Document ({file_metadata['id']}) processed")
#
# grouped_data = {}
#
# # Iterate through each dictionary in the list
# for item in node_descriptions:
# # Get the layer_decomposition_uuid of the current dictionary
# uuid = item["layer_decomposition_uuid"]
#
# # Check if this uuid is already a key in the grouped_data dictionary
# if uuid not in grouped_data:
# # If not, initialize a new list for this uuid
# grouped_data[uuid] = []
#
# # Append the current dictionary to the list corresponding to its uuid
# grouped_data[uuid].append(item)
return graph

View file

@ -91,7 +91,7 @@ class NetworXAdapter(GraphDBInterface):
# async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
# """Asynchronously create or update a user content graph based on given parameters."""
# # Assume required_layers is a dictionary-like object; use more robust validation in production
# category_name = required_layers['context_name']
# category_name = required_layers['data_type']
# subgroup_names = [required_layers['layer_name']]
#
# # Construct the additional_categories structure

View file

@ -9,14 +9,14 @@ class DuckDBAdapter():
self.db_client = duckdb.connect(db_location)
def get_datasets(self):
tables = self.db_client.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict()
tables = self.db_client.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict("list")
return list(
filter(
lambda table_name: table_name.endswith('staging') is False,
tables["schema_name"].values()
tables["schema_name"]
)
)
def get_files_metadata(self, dataset_name: str):
return self.db_client.sql(f"SELECT * FROM {dataset_name}.file_metadata;").to_df().to_dict()
return self.db_client.sql(f"SELECT * FROM {dataset_name}.file_metadata;").to_df().to_dict("records")

View file

@ -2,25 +2,25 @@
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
async def add_classification_nodes(graph_id, classification_data):
async def add_classification_nodes(document_id, classification_data):
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file()
context = classification_data["context_name"]
layer = classification_data["layer_name"]
data_type = classification_data["data_type"]
layer_name = classification_data["layer_name"]
# Create the layer classification node ID using the context_name
layer_classification_node_id = f"LLM_LAYER_CLASSIFICATION:{context}:{graph_id}"
# Create the layer classification node ID
layer_classification_node_id = f"LLM_LAYER_CLASSIFICATION:{data_type}:{document_id}"
# Add the node to the graph, unpacking the node data from the dictionary
await graph_client.add_node(layer_classification_node_id, **classification_data)
# Link this node to the corresponding document node
await graph_client.add_edge(graph_id, layer_classification_node_id, relationship = "classified_as")
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "classified_as")
# Create the detailed classification node ID using the context_name
detailed_classification_node_id = f"LLM_CLASSIFICATION:LAYER:{layer}:{graph_id}"
# Create the detailed classification node ID
detailed_classification_node_id = f"LLM_CLASSIFICATION:LAYER:{layer_name}:{document_id}"
# Add the detailed classification node, reusing the same node data
await graph_client.add_node(detailed_classification_node_id, **classification_data)
@ -29,22 +29,3 @@ async def add_classification_nodes(graph_id, classification_data):
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_analysis")
return True
# if __name__ == "__main__":
# import asyncio
# # Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
# # Initialize the graph client
# graph_client = get_graph_client(GraphDBType.NETWORKX)
# G = asyncio.run(add_classification_nodes(graph_client, "Document:doc1", {"data_type": "text",
# "context_name": "TEXT",
# "layer_name": "Articles, essays, and reports"}))
# from cognitive_architecture.utils import render_graph
# ff = asyncio.run( render_graph(G.graph, graph_type='networkx'))
# print(ff)

View file

@ -0,0 +1,28 @@
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType, Document, DocumentType, Category, Relationship
from .create import add_node_and_edge
def create_category(category_name: str):
return Category(
category_id = category_name.lower(),
name = category_name,
default_relationship = Relationship(type = "belongs_to")
)
async def add_document_node(parent_id, document_data):
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file()
document_id = f"DOCUMENT:{document_data['id']}"
document = Document(
doc_id = document_id,
title = document_data["name"],
doc_type = DocumentType(type_id = "PDF", description = "Portable Document Format"),
categories = list(map(create_category, document_data["categories"])) if "categories" in document_data else [],
)
document_dict = document.model_dump()
relationship = Relationship(type = "has_document").model_dump()
await add_node_and_edge(graph_client, parent_id, document_id, document_dict, relationship)

View file

@ -1,6 +1,6 @@
from networkx import Graph
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType
from networkx import Graph
async def extract_node_descriptions(data):
@ -25,7 +25,6 @@ async def add_node_connection(node_descriptions):
return grouped_data
def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
"""
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
@ -37,7 +36,6 @@ def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
for id, relationships in relationship_dict.items():
for relationship in relationships:
searched_node_attr_id = relationship['searched_node_id']
print(searched_node_attr_id)
score_attr_id = relationship['original_id_for_search']
score = relationship['score']
@ -58,8 +56,6 @@ def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
# Check if both nodes were found in the graph
if searched_node_key is not None and score_node_key is not None:
print(searched_node_key)
print(score_node_key)
# If both nodes exist, create an edge between them
# You can customize the edge attributes as needed, here we use 'score' as an attribute
graph.add_edge(searched_node_key, score_node_key, weight=score,

View file

@ -2,24 +2,29 @@
import uuid
import json
from datetime import datetime
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
async def add_propositions(graph_client, category_name, subclass_content, layer_description, new_data, layer_uuid,
layer_decomposition_uuid):
async def add_propositions(
data_type,
layer_name,
layer_description,
new_data,
layer_uuid,
layer_decomposition_uuid
):
""" Add nodes and edges to the graph for the given LLM knowledge graph and the layer"""
graph_client = get_graph_client(GraphDBType.NETWORKX)
# Find the node ID for the subclass within the category
await graph_client.load_graph_from_file()
subclass_node_id = None
for node, data in graph_client.graph.nodes(data=True):
if subclass_content in node:
subclass_node_id = node
layer_node_id = None
for node_id, data in graph_client.graph.nodes(data = True):
if layer_name in node_id:
layer_node_id = node_id
print(subclass_node_id)
if not subclass_node_id:
print(f"Subclass '{subclass_content}' under category '{category_name}' not found in the graph.")
if not layer_node_id:
print(f"Subclass '{layer_name}' under category '{data_type}' not found in the graph.")
return graph_client
# Mapping from old node IDs to new node IDs
@ -28,19 +33,24 @@ async def add_propositions(graph_client, category_name, subclass_content, layer_
# Add nodes from the Pydantic object
for node in new_data.nodes:
unique_node_id = uuid.uuid4()
new_node_id = f"{node.description} - {str(layer_uuid)} - {str(layer_decomposition_uuid)} - {str(unique_node_id)}"
await graph_client.add_node(new_node_id,
created_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
updated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
description=node.description,
category=node.category,
memory_type=node.memory_type,
layer_uuid=str(layer_uuid),
layer_description=str(layer_description),
layer_decomposition_uuid=str(layer_decomposition_uuid),
unique_id=str(unique_node_id),
type='detail')
await graph_client.add_edge(subclass_node_id, new_node_id, relationship='detail')
await graph_client.add_node(
new_node_id,
created_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
updated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
description=node.description,
category=node.category,
memory_type=node.memory_type,
layer_uuid=str(layer_uuid),
layer_description=str(layer_description),
layer_decomposition_uuid=str(layer_decomposition_uuid),
unique_id=str(unique_node_id),
type='detail'
)
await graph_client.add_edge(layer_node_id, new_node_id, relationship='detail')
# Store the mapping from old node ID to new node ID
node_id_mapping[node.id] = new_node_id
@ -56,18 +66,16 @@ async def add_propositions(graph_client, category_name, subclass_content, layer_
else:
print(f"Could not find mapping for edge from {edge.source} to {edge.target}")
return graph_client
async def append_to_graph(layer_graphs, required_layers, graph_client):
async def append_to_graph(layer_graphs, required_layers):
# Generate a UUID for the overall layer
layer_uuid = uuid.uuid4()
decomposition_uuids = set()
# Extract category name from required_layers data
category_name = required_layers["data_type"]
data_type = required_layers["data_type"]
# Extract subgroup name from required_layers data
# Assuming there's always at least one subclass and we're taking the first
subgroup_name = required_layers["layer_name"]
# Assuming there's always at least one layer and we're taking the first
layer_name = required_layers["layer_name"]
for layer_ind in layer_graphs:
@ -77,14 +85,19 @@ async def append_to_graph(layer_graphs, required_layers, graph_client):
# Generate a UUID for this particular layer decomposition
layer_decomposition_uuid = uuid.uuid4()
decomposition_uuids.add(layer_decomposition_uuid)
# Assuming append_data_to_graph is defined elsewhere and appends data to graph_client
# You would pass relevant information from knowledge_graph along with other details to this function
await add_propositions(graph_client, category_name, subgroup_name, layer_description, knowledge_graph,
layer_uuid, layer_decomposition_uuid)
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
print("Updated Nodes:", graph_client.graph.nodes(data=True))
await add_propositions(
data_type,
layer_name,
layer_description,
knowledge_graph,
layer_uuid,
layer_decomposition_uuid
)
return decomposition_uuids

View file

@ -1,13 +1,12 @@
""" This module is responsible for creating a semantic graph """
from datetime import datetime
from typing import Optional, Any
from pydantic import BaseModel
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation
from cognitive_architecture.shared.data_models import GraphDBType
async def generate_node_id(instance: BaseModel) -> str:
for field in ['id', 'doc_id', 'location_id', 'type_id']:
for field in ["id", "doc_id", "location_id", "type_id"]:
if hasattr(instance, field):
return f"{instance.__class__.__name__}:{getattr(instance, field)}"
return f"{instance.__class__.__name__}:default"
@ -19,100 +18,100 @@ async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node
await client.add_edge(parent_id, node_id, **relationship_data)
async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):
async def process_attribute(graph_client, parent_id: Optional[str], attribute: str, value: Any):
if isinstance(value, BaseModel):
node_id = await generate_node_id(value)
node_data = value.dict(exclude={'default_relationship'})
node_data = value.dict(exclude={"default_relationship"})
# Use the specified default relationship for the edge between the parent node and the current node
relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}
await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)
relationship_data = value.default_relationship.dict() if hasattr(value, "default_relationship") else {}
await add_node_and_edge(graph_client, parent_id, node_id, node_data, relationship_data)
# Recursively process nested attributes to ensure all nodes and relationships are added to the graph
for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly
await process_attribute(G, node_id, sub_attr, sub_val)
await process_attribute(graph_client, node_id, sub_attr, sub_val)
elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):
# For lists of BaseModel instances, process each item in the list
for item in value:
await process_attribute(G, parent_id, attribute, item)
async def create_dynamic(graph_model, client) :
await client.load_graph_from_file()
await process_attribute(graph_client, parent_id, attribute, item)
async def create_dynamic(graph_model) :
root_id = await generate_node_id(graph_model)
node_data = graph_model.dict(exclude = {"default_relationship", "id"})
print(node_data)
await client.add_node(root_id, **node_data)
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.add_node(root_id, **node_data)
for attribute_name, attribute_value in graph_model:
await process_attribute(client, root_id, attribute_name, attribute_value)
await process_attribute(graph_client, root_id, attribute_name, attribute_value)
return client
return graph_client
async def create_semantic_graph(graph_model_instance, graph_client):
await graph_client.load_graph_from_file()
async def create_semantic_graph(graph_model_instance):
# Dynamic graph creation based on the provided graph model instance
graph = await create_dynamic(graph_model_instance, graph_client)
# Example of adding a node and saving the graph can be demonstrated in the __main__ section or in tests
graph = await create_dynamic(graph_model_instance)
return graph
if __name__ == "__main__":
import asyncio
# Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
# Initialize the graph client
graph_client = get_graph_client(GraphDBType.NETWORKX)
# if __name__ == "__main__":
# import asyncio
# Define a GraphModel instance with example data
graph_model_instance = DefaultGraphModel(
id="user123",
documents=[
Document(
doc_id="doc1",
title="Document 1",
summary="Summary of Document 1",
content_id="content_id_for_doc1",
doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
categories=[
Category(category_id="finance", name="Finance", default_relationship=Relationship(type="belongs_to")),
Category(category_id="tech", name="Technology", default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type="has_document")
),
Document(
doc_id="doc2",
title="Document 2",
summary="Summary of Document 2",
content_id="content_id_for_doc2",
doc_type=DocumentType(type_id="TXT", description="Text File"),
categories=[
Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
Category(category_id="wellness", name="Wellness", default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type="has_document")
)
],
user_properties=UserProperties(
custom_properties={"age": "30"},
location=UserLocation(location_id="ny", description="New York", default_relationship=Relationship(type="located_in"))
),
default_fields={
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
)
# # Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
# Run the graph creation asynchronously
G = asyncio.run(create_semantic_graph(graph_model_instance, graph_client))
# # Initialize the graph client
# graph_client = get_graph_client(GraphDBType.NETWORKX)
# Optionally, here you can add more nodes, edges, or perform other operations on the graph G
# # Define a GraphModel instance with example data
# graph_model_instance = DefaultGraphModel(
# id="user123",
# documents=[
# Document(
# doc_id="doc1",
# title="Document 1",
# summary="Summary of Document 1",
# content_id="content_id_for_doc1",
# doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
# categories=[
# Category(category_id="finance", name="Finance", default_relationship=Relationship(type="belongs_to")),
# Category(category_id="tech", name="Technology", default_relationship=Relationship(type="belongs_to"))
# ],
# default_relationship=Relationship(type="has_document")
# ),
# Document(
# doc_id="doc2",
# title="Document 2",
# summary="Summary of Document 2",
# content_id="content_id_for_doc2",
# doc_type=DocumentType(type_id="TXT", description="Text File"),
# categories=[
# Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
# Category(category_id="wellness", name="Wellness", default_relationship=Relationship(type="belongs_to"))
# ],
# default_relationship=Relationship(type="has_document")
# )
# ],
# user_properties=UserProperties(
# custom_properties={"age": "30"},
# location=UserLocation(location_id="ny", description="New York", default_relationship=Relationship(type="located_in"))
# ),
# default_fields={
# "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# }
# )
# # Run the graph creation asynchronously
# G = asyncio.run(create_semantic_graph(graph_model_instance, graph_client))
# # Optionally, here you can add more nodes, edges, or perform other operations on the graph G
# async def create_semantic_graph(
# ):

View file

@ -0,0 +1,22 @@
from datetime import datetime
from cognitive_architecture.shared.data_models import DefaultGraphModel, Relationship, UserProperties, UserLocation
from cognitive_architecture.modules.cognify.graph.create import create_semantic_graph
async def initialize_graph(root_id: str):
graph = DefaultGraphModel(
id = root_id,
user_properties = UserProperties(
custom_properties = {"age": "30"},
location = UserLocation(
location_id = "ny",
description = "New York",
default_relationship = Relationship(type = "located_in")
)
),
default_fields = {
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
)
await create_semantic_graph(graph)

View file

@ -1,5 +1,5 @@
""" This module contains the code to classify content into categories using the LLM API. """
from typing import Type
from typing import Type, List
from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.utils import read_query_prompt
@ -13,24 +13,23 @@ async def classify_into_categories(text_input: str, system_prompt_path: str, res
return extract_categories(llm_output.dict())
def extract_categories(llm_output):
def extract_categories(llm_output) -> List[dict]:
# Extract the first subclass from the list (assuming there could be more)
subclass_enum = llm_output["label"]["subclass"][0]
layer_enum = llm_output["label"]["subclass"][0]
# The data type is derived from "type" and converted to lowercase
data_type = llm_output["label"]["type"].lower()
# The context name is the name of the Enum member (e.g., "NEWS_STORIES")
# context_name = subclass_enum.name.replace("_", " ").title()
# The layer name is the name of the Enum member (e.g., "NEWS_STORIES")
# layer_name = layer_enum.name.replace("_", " ").title()
# The layer name is the value of the Enum member (e.g., "News stories and blog posts")
layer_name = subclass_enum.value
layer_name = layer_enum.value
return {
return [{
"data_type": data_type,
"context_name": data_type.upper(), # llm context classification
"layer_name": layer_name # llm layer classification
}
}]
# if __name__ == "__main__":
# import asyncio

View file

@ -10,15 +10,3 @@ async def content_to_cog_layers(filename: str, context, response_model: Type[Bas
formatted_text_input = await async_render_template(filename, context)
return await llm_client.acreate_structured_output(formatted_text_input, formatted_text_input, response_model)
# if __name__ == "__main__":
# import asyncio
# asyncio.run(content_to_cog_layers("generate_cog_layers.txt", {
# 'data_type': 'text',
# 'context_name': 'Scientific Research',
# 'layer_name': 'Content Layer'
# }, response_model=CognitiveLayer))