Merge remote-tracking branch 'origin/feat/COG-24-add-qdrant' into feat/COG-24-add-qdrant
This commit is contained in:
commit
801069b4c0
13 changed files with 221 additions and 488 deletions
233
cognee.ipynb
233
cognee.ipynb
File diff suppressed because one or more lines are too long
|
|
@ -27,7 +27,7 @@ async def add_standalone(
|
|||
promises = []
|
||||
|
||||
for data_item in data:
|
||||
promises.append(add(data_item, dataset_id, dataset_name))
|
||||
promises.append(add_standalone(data_item, dataset_id, dataset_name))
|
||||
|
||||
results = await asyncio.gather(*promises)
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ async def add_standalone(
|
|||
|
||||
if is_data_path(data):
|
||||
with open(data.replace("file://", ""), "rb") as file:
|
||||
return await add(file, dataset_id, dataset_name)
|
||||
return await add_standalone(file, dataset_id, dataset_name)
|
||||
|
||||
classified_data = ingestion.classify(data)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,39 +21,45 @@ from cognitive_architecture.modules.cognify.llm.classify_content import classify
|
|||
from cognitive_architecture.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers
|
||||
from cognitive_architecture.modules.cognify.llm.generate_graph import generate_graph
|
||||
from cognitive_architecture.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer
|
||||
from cognitive_architecture.modules.cognify.graph.create import create_semantic_graph
|
||||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognitive_architecture.shared.data_models import GraphDBType
|
||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
from cognitive_architecture.infrastructure.databases.relational import DuckDBAdapter
|
||||
from cognitive_architecture.modules.cognify.graph.add_document_node import add_document_node
|
||||
from cognitive_architecture.modules.cognify.graph.initialize_graph import initialize_graph
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
aclient = instructor.patch(OpenAI())
|
||||
|
||||
USER_ID = "default_user"
|
||||
|
||||
async def cognify(dataset_name: str):
|
||||
"""This function is responsible for the cognitive processing of the content."""
|
||||
|
||||
db = DuckDBAdapter()
|
||||
files_metadata = db.get_files_metadata(dataset_name)
|
||||
files = list(files_metadata["file_path"].values())
|
||||
|
||||
awaitables = []
|
||||
|
||||
for file in files:
|
||||
with open(file, "rb") as file:
|
||||
await initialize_graph(USER_ID)
|
||||
|
||||
for file_metadata in files_metadata:
|
||||
with open(file_metadata["file_path"], "rb") as file:
|
||||
elements = partition_pdf(file = file, strategy = "fast")
|
||||
text = "\n".join(map(lambda element: clean(element.text), elements))
|
||||
|
||||
awaitables.append(process_text(text))
|
||||
awaitables.append(process_text(text, file_metadata))
|
||||
|
||||
graphs = await asyncio.gather(*awaitables)
|
||||
|
||||
return graphs[0]
|
||||
|
||||
async def process_text(input_text: str):
|
||||
classified_categories = None
|
||||
async def process_text(input_text: str, file_metadata: dict):
|
||||
print(f"Processing document ({file_metadata['id']})")
|
||||
|
||||
classified_categories = []
|
||||
|
||||
try:
|
||||
# Classify the content into categories
|
||||
|
|
@ -62,13 +68,17 @@ async def process_text(input_text: str):
|
|||
"classify_content.txt",
|
||||
DefaultContentPrediction
|
||||
)
|
||||
file_metadata["categories"] = list(map(lambda category: category["layer_name"], classified_categories))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata)
|
||||
print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}")
|
||||
|
||||
cognitive_layers = await content_to_cog_layers(
|
||||
"generate_cog_layers.txt",
|
||||
classified_categories,
|
||||
classified_categories[0],
|
||||
response_model = DefaultCognitiveLayer
|
||||
)
|
||||
|
||||
|
|
@ -84,73 +94,17 @@ async def process_text(input_text: str):
|
|||
layer_graphs = await generate_graph_per_layer(input_text, cognitive_layers)
|
||||
# print(layer_graphs)
|
||||
|
||||
# ADD SUMMARY
|
||||
# ADD CATEGORIES
|
||||
print(f"Document ({file_metadata['id']}) layer graphs created")
|
||||
|
||||
# Define a GraphModel instance with example data
|
||||
# graph_model_instance = DefaultGraphModel(
|
||||
# id="user123",
|
||||
# documents=[
|
||||
# Document(
|
||||
# doc_id = "doc1",
|
||||
# title = "Document 1",
|
||||
# summary = "Summary of Document 1",
|
||||
# content_id = "content_id_for_doc1",
|
||||
# doc_type = DocumentType(type_id = "PDF", description = "Portable Document Format"),
|
||||
# categories = [
|
||||
# Category(
|
||||
# category_id = "finance",
|
||||
# name = "Finance",
|
||||
# default_relationship = Relationship(type = "belongs_to")
|
||||
# ),
|
||||
# Category(
|
||||
# category_id = "tech",
|
||||
# name = "Technology",
|
||||
# default_relationship = Relationship(type = "belongs_to")
|
||||
# )
|
||||
# ],
|
||||
# default_relationship = Relationship(type="has_document")
|
||||
# ),
|
||||
# Document(
|
||||
# doc_id = "doc2",
|
||||
# title = "Document 2",
|
||||
# summary = "Summary of Document 2",
|
||||
# content_id = "content_id_for_doc2",
|
||||
# doc_type = DocumentType(type_id = "TXT", description = "Text File"),
|
||||
# categories = [
|
||||
# Category(
|
||||
# category_id = "health",
|
||||
# name = "Health",
|
||||
# default_relationship = Relationship(type="belongs_to")
|
||||
# ),
|
||||
# Category(
|
||||
# category_id = "wellness",
|
||||
# name = "Wellness",
|
||||
# default_relationship = Relationship(type="belongs_to")
|
||||
# )
|
||||
# ],
|
||||
# default_relationship = Relationship(type = "has_document")
|
||||
# )
|
||||
# ],
|
||||
# user_properties = UserProperties(
|
||||
# custom_properties = {"age": "30"},
|
||||
# location = UserLocation(
|
||||
# location_id = "ny",
|
||||
# description = "New York",
|
||||
# default_relationship = Relationship(type = "located_in"))
|
||||
# ),
|
||||
# default_fields={
|
||||
# "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
# "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# }
|
||||
# )
|
||||
# G = await create_semantic_graph(graph_model_instance)
|
||||
|
||||
await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0])
|
||||
|
||||
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0])
|
||||
|
||||
print(f"Document ({file_metadata['id']}) layers connected")
|
||||
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
# G = await create_semantic_graph(graph_model_instance, graph_client)
|
||||
|
||||
await add_classification_nodes("Document:doc1", classified_categories)
|
||||
|
||||
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories, graph_client)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
|
|
@ -169,7 +123,6 @@ async def process_text(input_text: str):
|
|||
size = 3072
|
||||
)
|
||||
},
|
||||
# Set other configs as needed
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -179,25 +132,14 @@ async def process_text(input_text: str):
|
|||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
# from qdrant_client import QdrantClient
|
||||
# qdrant = QdrantClient(
|
||||
# url=os.getenv("QDRANT_URL"),
|
||||
# api_key=os.getenv("QDRANT_API_KEY"))
|
||||
#
|
||||
# collections_response = qdrant.http.collections_api.get_collections()
|
||||
# collections = collections_response.result.collections
|
||||
# print(collections)
|
||||
|
||||
await add_propositions(node_descriptions)
|
||||
|
||||
grouped_data = await add_node_connection(node_descriptions)
|
||||
|
||||
# print("we are here, grouped_data", grouped_data)
|
||||
|
||||
llm_client = get_llm_client()
|
||||
|
||||
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
|
||||
|
||||
# print("we are here", relationship_dict[0])
|
||||
|
||||
results = await adapted_qdrant_batch_search(relationship_dict, db)
|
||||
|
|
@ -208,23 +150,9 @@ async def process_text(input_text: str):
|
|||
|
||||
connect_nodes_in_graph(graph, relationship_d)
|
||||
|
||||
return graph
|
||||
print(f"Document ({file_metadata['id']}) processed")
|
||||
|
||||
#
|
||||
# grouped_data = {}
|
||||
#
|
||||
# # Iterate through each dictionary in the list
|
||||
# for item in node_descriptions:
|
||||
# # Get the layer_decomposition_uuid of the current dictionary
|
||||
# uuid = item["layer_decomposition_uuid"]
|
||||
#
|
||||
# # Check if this uuid is already a key in the grouped_data dictionary
|
||||
# if uuid not in grouped_data:
|
||||
# # If not, initialize a new list for this uuid
|
||||
# grouped_data[uuid] = []
|
||||
#
|
||||
# # Append the current dictionary to the list corresponding to its uuid
|
||||
# grouped_data[uuid].append(item)
|
||||
return graph
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ class NetworXAdapter(GraphDBInterface):
|
|||
# async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
|
||||
# """Asynchronously create or update a user content graph based on given parameters."""
|
||||
# # Assume required_layers is a dictionary-like object; use more robust validation in production
|
||||
# category_name = required_layers['context_name']
|
||||
# category_name = required_layers['data_type']
|
||||
# subgroup_names = [required_layers['layer_name']]
|
||||
#
|
||||
# # Construct the additional_categories structure
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ class DuckDBAdapter():
|
|||
self.db_client = duckdb.connect(db_location)
|
||||
|
||||
def get_datasets(self):
|
||||
tables = self.db_client.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict()
|
||||
tables = self.db_client.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict("list")
|
||||
|
||||
return list(
|
||||
filter(
|
||||
lambda table_name: table_name.endswith('staging') is False,
|
||||
tables["schema_name"].values()
|
||||
tables["schema_name"]
|
||||
)
|
||||
)
|
||||
|
||||
def get_files_metadata(self, dataset_name: str):
|
||||
return self.db_client.sql(f"SELECT * FROM {dataset_name}.file_metadata;").to_df().to_dict()
|
||||
return self.db_client.sql(f"SELECT * FROM {dataset_name}.file_metadata;").to_df().to_dict("records")
|
||||
|
|
|
|||
|
|
@ -2,25 +2,25 @@
|
|||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
|
||||
|
||||
|
||||
async def add_classification_nodes(graph_id, classification_data):
|
||||
async def add_classification_nodes(document_id, classification_data):
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
context = classification_data["context_name"]
|
||||
layer = classification_data["layer_name"]
|
||||
data_type = classification_data["data_type"]
|
||||
layer_name = classification_data["layer_name"]
|
||||
|
||||
# Create the layer classification node ID using the context_name
|
||||
layer_classification_node_id = f"LLM_LAYER_CLASSIFICATION:{context}:{graph_id}"
|
||||
# Create the layer classification node ID
|
||||
layer_classification_node_id = f"LLM_LAYER_CLASSIFICATION:{data_type}:{document_id}"
|
||||
|
||||
# Add the node to the graph, unpacking the node data from the dictionary
|
||||
await graph_client.add_node(layer_classification_node_id, **classification_data)
|
||||
|
||||
# Link this node to the corresponding document node
|
||||
await graph_client.add_edge(graph_id, layer_classification_node_id, relationship = "classified_as")
|
||||
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "classified_as")
|
||||
|
||||
# Create the detailed classification node ID using the context_name
|
||||
detailed_classification_node_id = f"LLM_CLASSIFICATION:LAYER:{layer}:{graph_id}"
|
||||
# Create the detailed classification node ID
|
||||
detailed_classification_node_id = f"LLM_CLASSIFICATION:LAYER:{layer_name}:{document_id}"
|
||||
|
||||
# Add the detailed classification node, reusing the same node data
|
||||
await graph_client.add_node(detailed_classification_node_id, **classification_data)
|
||||
|
|
@ -29,22 +29,3 @@ async def add_classification_nodes(graph_id, classification_data):
|
|||
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_analysis")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
|
||||
# # Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
|
||||
|
||||
# # Initialize the graph client
|
||||
# graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
|
||||
# G = asyncio.run(add_classification_nodes(graph_client, "Document:doc1", {"data_type": "text",
|
||||
# "context_name": "TEXT",
|
||||
# "layer_name": "Articles, essays, and reports"}))
|
||||
|
||||
# from cognitive_architecture.utils import render_graph
|
||||
# ff = asyncio.run( render_graph(G.graph, graph_type='networkx'))
|
||||
# print(ff)
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognitive_architecture.shared.data_models import GraphDBType, Document, DocumentType, Category, Relationship
|
||||
from .create import add_node_and_edge
|
||||
|
||||
def create_category(category_name: str):
|
||||
return Category(
|
||||
category_id = category_name.lower(),
|
||||
name = category_name,
|
||||
default_relationship = Relationship(type = "belongs_to")
|
||||
)
|
||||
|
||||
async def add_document_node(parent_id, document_data):
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
document_id = f"DOCUMENT:{document_data['id']}"
|
||||
|
||||
document = Document(
|
||||
doc_id = document_id,
|
||||
title = document_data["name"],
|
||||
doc_type = DocumentType(type_id = "PDF", description = "Portable Document Format"),
|
||||
categories = list(map(create_category, document_data["categories"])) if "categories" in document_data else [],
|
||||
)
|
||||
|
||||
document_dict = document.model_dump()
|
||||
relationship = Relationship(type = "has_document").model_dump()
|
||||
|
||||
await add_node_and_edge(graph_client, parent_id, document_id, document_dict, relationship)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from networkx import Graph
|
||||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognitive_architecture.shared.data_models import GraphDBType
|
||||
from networkx import Graph
|
||||
|
||||
|
||||
async def extract_node_descriptions(data):
|
||||
|
|
@ -25,7 +25,6 @@ async def add_node_connection(node_descriptions):
|
|||
|
||||
return grouped_data
|
||||
|
||||
|
||||
def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
|
||||
"""
|
||||
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
|
||||
|
|
@ -37,7 +36,6 @@ def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
|
|||
for id, relationships in relationship_dict.items():
|
||||
for relationship in relationships:
|
||||
searched_node_attr_id = relationship['searched_node_id']
|
||||
print(searched_node_attr_id)
|
||||
score_attr_id = relationship['original_id_for_search']
|
||||
score = relationship['score']
|
||||
|
||||
|
|
@ -58,8 +56,6 @@ def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
|
|||
|
||||
# Check if both nodes were found in the graph
|
||||
if searched_node_key is not None and score_node_key is not None:
|
||||
print(searched_node_key)
|
||||
print(score_node_key)
|
||||
# If both nodes exist, create an edge between them
|
||||
# You can customize the edge attributes as needed, here we use 'score' as an attribute
|
||||
graph.add_edge(searched_node_key, score_node_key, weight=score,
|
||||
|
|
|
|||
|
|
@ -2,24 +2,29 @@
|
|||
import uuid
|
||||
import json
|
||||
from datetime import datetime
|
||||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
|
||||
|
||||
|
||||
async def add_propositions(graph_client, category_name, subclass_content, layer_description, new_data, layer_uuid,
|
||||
layer_decomposition_uuid):
|
||||
async def add_propositions(
|
||||
data_type,
|
||||
layer_name,
|
||||
layer_description,
|
||||
new_data,
|
||||
layer_uuid,
|
||||
layer_decomposition_uuid
|
||||
):
|
||||
""" Add nodes and edges to the graph for the given LLM knowledge graph and the layer"""
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
# Find the node ID for the subclass within the category
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
subclass_node_id = None
|
||||
for node, data in graph_client.graph.nodes(data=True):
|
||||
if subclass_content in node:
|
||||
subclass_node_id = node
|
||||
layer_node_id = None
|
||||
for node_id, data in graph_client.graph.nodes(data = True):
|
||||
if layer_name in node_id:
|
||||
layer_node_id = node_id
|
||||
|
||||
print(subclass_node_id)
|
||||
|
||||
if not subclass_node_id:
|
||||
print(f"Subclass '{subclass_content}' under category '{category_name}' not found in the graph.")
|
||||
if not layer_node_id:
|
||||
print(f"Subclass '{layer_name}' under category '{data_type}' not found in the graph.")
|
||||
return graph_client
|
||||
|
||||
# Mapping from old node IDs to new node IDs
|
||||
|
|
@ -28,19 +33,24 @@ async def add_propositions(graph_client, category_name, subclass_content, layer_
|
|||
# Add nodes from the Pydantic object
|
||||
for node in new_data.nodes:
|
||||
unique_node_id = uuid.uuid4()
|
||||
|
||||
new_node_id = f"{node.description} - {str(layer_uuid)} - {str(layer_decomposition_uuid)} - {str(unique_node_id)}"
|
||||
await graph_client.add_node(new_node_id,
|
||||
created_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
updated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
description=node.description,
|
||||
category=node.category,
|
||||
memory_type=node.memory_type,
|
||||
layer_uuid=str(layer_uuid),
|
||||
layer_description=str(layer_description),
|
||||
layer_decomposition_uuid=str(layer_decomposition_uuid),
|
||||
unique_id=str(unique_node_id),
|
||||
type='detail')
|
||||
await graph_client.add_edge(subclass_node_id, new_node_id, relationship='detail')
|
||||
|
||||
await graph_client.add_node(
|
||||
new_node_id,
|
||||
created_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
updated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
description=node.description,
|
||||
category=node.category,
|
||||
memory_type=node.memory_type,
|
||||
layer_uuid=str(layer_uuid),
|
||||
layer_description=str(layer_description),
|
||||
layer_decomposition_uuid=str(layer_decomposition_uuid),
|
||||
unique_id=str(unique_node_id),
|
||||
type='detail'
|
||||
)
|
||||
|
||||
await graph_client.add_edge(layer_node_id, new_node_id, relationship='detail')
|
||||
|
||||
# Store the mapping from old node ID to new node ID
|
||||
node_id_mapping[node.id] = new_node_id
|
||||
|
|
@ -56,18 +66,16 @@ async def add_propositions(graph_client, category_name, subclass_content, layer_
|
|||
else:
|
||||
print(f"Could not find mapping for edge from {edge.source} to {edge.target}")
|
||||
|
||||
return graph_client
|
||||
|
||||
async def append_to_graph(layer_graphs, required_layers, graph_client):
|
||||
async def append_to_graph(layer_graphs, required_layers):
|
||||
# Generate a UUID for the overall layer
|
||||
layer_uuid = uuid.uuid4()
|
||||
decomposition_uuids = set()
|
||||
# Extract category name from required_layers data
|
||||
category_name = required_layers["data_type"]
|
||||
data_type = required_layers["data_type"]
|
||||
|
||||
# Extract subgroup name from required_layers data
|
||||
# Assuming there's always at least one subclass and we're taking the first
|
||||
subgroup_name = required_layers["layer_name"]
|
||||
# Assuming there's always at least one layer and we're taking the first
|
||||
layer_name = required_layers["layer_name"]
|
||||
|
||||
for layer_ind in layer_graphs:
|
||||
|
||||
|
|
@ -77,14 +85,19 @@ async def append_to_graph(layer_graphs, required_layers, graph_client):
|
|||
|
||||
# Generate a UUID for this particular layer decomposition
|
||||
layer_decomposition_uuid = uuid.uuid4()
|
||||
|
||||
decomposition_uuids.add(layer_decomposition_uuid)
|
||||
|
||||
# Assuming append_data_to_graph is defined elsewhere and appends data to graph_client
|
||||
# You would pass relevant information from knowledge_graph along with other details to this function
|
||||
await add_propositions(graph_client, category_name, subgroup_name, layer_description, knowledge_graph,
|
||||
layer_uuid, layer_decomposition_uuid)
|
||||
|
||||
# Print updated graph for verification (assuming F is the updated NetworkX Graph)
|
||||
print("Updated Nodes:", graph_client.graph.nodes(data=True))
|
||||
await add_propositions(
|
||||
data_type,
|
||||
layer_name,
|
||||
layer_description,
|
||||
knowledge_graph,
|
||||
layer_uuid,
|
||||
layer_decomposition_uuid
|
||||
)
|
||||
|
||||
return decomposition_uuids
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
""" This module is responsible for creating a semantic graph """
|
||||
from datetime import datetime
|
||||
from typing import Optional, Any
|
||||
from pydantic import BaseModel
|
||||
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation
|
||||
from cognitive_architecture.shared.data_models import GraphDBType
|
||||
|
||||
|
||||
async def generate_node_id(instance: BaseModel) -> str:
|
||||
for field in ['id', 'doc_id', 'location_id', 'type_id']:
|
||||
for field in ["id", "doc_id", "location_id", "type_id"]:
|
||||
if hasattr(instance, field):
|
||||
return f"{instance.__class__.__name__}:{getattr(instance, field)}"
|
||||
return f"{instance.__class__.__name__}:default"
|
||||
|
|
@ -19,100 +18,100 @@ async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node
|
|||
await client.add_edge(parent_id, node_id, **relationship_data)
|
||||
|
||||
|
||||
async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):
|
||||
async def process_attribute(graph_client, parent_id: Optional[str], attribute: str, value: Any):
|
||||
if isinstance(value, BaseModel):
|
||||
node_id = await generate_node_id(value)
|
||||
node_data = value.dict(exclude={'default_relationship'})
|
||||
|
||||
node_data = value.dict(exclude={"default_relationship"})
|
||||
|
||||
# Use the specified default relationship for the edge between the parent node and the current node
|
||||
relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}
|
||||
await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)
|
||||
relationship_data = value.default_relationship.dict() if hasattr(value, "default_relationship") else {}
|
||||
|
||||
await add_node_and_edge(graph_client, parent_id, node_id, node_data, relationship_data)
|
||||
|
||||
# Recursively process nested attributes to ensure all nodes and relationships are added to the graph
|
||||
for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly
|
||||
await process_attribute(G, node_id, sub_attr, sub_val)
|
||||
await process_attribute(graph_client, node_id, sub_attr, sub_val)
|
||||
|
||||
elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):
|
||||
# For lists of BaseModel instances, process each item in the list
|
||||
for item in value:
|
||||
await process_attribute(G, parent_id, attribute, item)
|
||||
|
||||
async def create_dynamic(graph_model, client) :
|
||||
await client.load_graph_from_file()
|
||||
await process_attribute(graph_client, parent_id, attribute, item)
|
||||
|
||||
async def create_dynamic(graph_model) :
|
||||
root_id = await generate_node_id(graph_model)
|
||||
|
||||
node_data = graph_model.dict(exclude = {"default_relationship", "id"})
|
||||
print(node_data)
|
||||
|
||||
await client.add_node(root_id, **node_data)
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.add_node(root_id, **node_data)
|
||||
|
||||
for attribute_name, attribute_value in graph_model:
|
||||
await process_attribute(client, root_id, attribute_name, attribute_value)
|
||||
await process_attribute(graph_client, root_id, attribute_name, attribute_value)
|
||||
|
||||
return client
|
||||
return graph_client
|
||||
|
||||
|
||||
async def create_semantic_graph(graph_model_instance, graph_client):
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
async def create_semantic_graph(graph_model_instance):
|
||||
# Dynamic graph creation based on the provided graph model instance
|
||||
graph = await create_dynamic(graph_model_instance, graph_client)
|
||||
|
||||
# Example of adding a node and saving the graph can be demonstrated in the __main__ section or in tests
|
||||
graph = await create_dynamic(graph_model_instance)
|
||||
|
||||
return graph
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
# Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
|
||||
|
||||
# Initialize the graph client
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
|
||||
# Define a GraphModel instance with example data
|
||||
graph_model_instance = DefaultGraphModel(
|
||||
id="user123",
|
||||
documents=[
|
||||
Document(
|
||||
doc_id="doc1",
|
||||
title="Document 1",
|
||||
summary="Summary of Document 1",
|
||||
content_id="content_id_for_doc1",
|
||||
doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
|
||||
categories=[
|
||||
Category(category_id="finance", name="Finance", default_relationship=Relationship(type="belongs_to")),
|
||||
Category(category_id="tech", name="Technology", default_relationship=Relationship(type="belongs_to"))
|
||||
],
|
||||
default_relationship=Relationship(type="has_document")
|
||||
),
|
||||
Document(
|
||||
doc_id="doc2",
|
||||
title="Document 2",
|
||||
summary="Summary of Document 2",
|
||||
content_id="content_id_for_doc2",
|
||||
doc_type=DocumentType(type_id="TXT", description="Text File"),
|
||||
categories=[
|
||||
Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
|
||||
Category(category_id="wellness", name="Wellness", default_relationship=Relationship(type="belongs_to"))
|
||||
],
|
||||
default_relationship=Relationship(type="has_document")
|
||||
)
|
||||
],
|
||||
user_properties=UserProperties(
|
||||
custom_properties={"age": "30"},
|
||||
location=UserLocation(location_id="ny", description="New York", default_relationship=Relationship(type="located_in"))
|
||||
),
|
||||
default_fields={
|
||||
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
)
|
||||
# # Assuming all necessary imports and GraphDBType, get_graph_client, Document, DocumentType, etc. are defined
|
||||
|
||||
# Run the graph creation asynchronously
|
||||
G = asyncio.run(create_semantic_graph(graph_model_instance, graph_client))
|
||||
# # Initialize the graph client
|
||||
# graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
# Optionally, here you can add more nodes, edges, or perform other operations on the graph G
|
||||
# # Define a GraphModel instance with example data
|
||||
# graph_model_instance = DefaultGraphModel(
|
||||
# id="user123",
|
||||
# documents=[
|
||||
# Document(
|
||||
# doc_id="doc1",
|
||||
# title="Document 1",
|
||||
# summary="Summary of Document 1",
|
||||
# content_id="content_id_for_doc1",
|
||||
# doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
|
||||
# categories=[
|
||||
# Category(category_id="finance", name="Finance", default_relationship=Relationship(type="belongs_to")),
|
||||
# Category(category_id="tech", name="Technology", default_relationship=Relationship(type="belongs_to"))
|
||||
# ],
|
||||
# default_relationship=Relationship(type="has_document")
|
||||
# ),
|
||||
# Document(
|
||||
# doc_id="doc2",
|
||||
# title="Document 2",
|
||||
# summary="Summary of Document 2",
|
||||
# content_id="content_id_for_doc2",
|
||||
# doc_type=DocumentType(type_id="TXT", description="Text File"),
|
||||
# categories=[
|
||||
# Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
|
||||
# Category(category_id="wellness", name="Wellness", default_relationship=Relationship(type="belongs_to"))
|
||||
# ],
|
||||
# default_relationship=Relationship(type="has_document")
|
||||
# )
|
||||
# ],
|
||||
# user_properties=UserProperties(
|
||||
# custom_properties={"age": "30"},
|
||||
# location=UserLocation(location_id="ny", description="New York", default_relationship=Relationship(type="located_in"))
|
||||
# ),
|
||||
# default_fields={
|
||||
# "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
# "updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
# }
|
||||
# )
|
||||
|
||||
# # Run the graph creation asynchronously
|
||||
# G = asyncio.run(create_semantic_graph(graph_model_instance, graph_client))
|
||||
|
||||
# # Optionally, here you can add more nodes, edges, or perform other operations on the graph G
|
||||
|
||||
# async def create_semantic_graph(
|
||||
# ):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
from datetime import datetime
|
||||
from cognitive_architecture.shared.data_models import DefaultGraphModel, Relationship, UserProperties, UserLocation
|
||||
from cognitive_architecture.modules.cognify.graph.create import create_semantic_graph
|
||||
|
||||
async def initialize_graph(root_id: str):
|
||||
graph = DefaultGraphModel(
|
||||
id = root_id,
|
||||
user_properties = UserProperties(
|
||||
custom_properties = {"age": "30"},
|
||||
location = UserLocation(
|
||||
location_id = "ny",
|
||||
description = "New York",
|
||||
default_relationship = Relationship(type = "located_in")
|
||||
)
|
||||
),
|
||||
default_fields = {
|
||||
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
)
|
||||
|
||||
await create_semantic_graph(graph)
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
""" This module contains the code to classify content into categories using the LLM API. """
|
||||
from typing import Type
|
||||
from typing import Type, List
|
||||
from pydantic import BaseModel
|
||||
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognitive_architecture.utils import read_query_prompt
|
||||
|
|
@ -13,24 +13,23 @@ async def classify_into_categories(text_input: str, system_prompt_path: str, res
|
|||
|
||||
return extract_categories(llm_output.dict())
|
||||
|
||||
def extract_categories(llm_output):
|
||||
def extract_categories(llm_output) -> List[dict]:
|
||||
# Extract the first subclass from the list (assuming there could be more)
|
||||
subclass_enum = llm_output["label"]["subclass"][0]
|
||||
layer_enum = llm_output["label"]["subclass"][0]
|
||||
|
||||
# The data type is derived from "type" and converted to lowercase
|
||||
data_type = llm_output["label"]["type"].lower()
|
||||
|
||||
# The context name is the name of the Enum member (e.g., "NEWS_STORIES")
|
||||
# context_name = subclass_enum.name.replace("_", " ").title()
|
||||
# The layer name is the name of the Enum member (e.g., "NEWS_STORIES")
|
||||
# layer_name = layer_enum.name.replace("_", " ").title()
|
||||
|
||||
# The layer name is the value of the Enum member (e.g., "News stories and blog posts")
|
||||
layer_name = subclass_enum.value
|
||||
layer_name = layer_enum.value
|
||||
|
||||
return {
|
||||
return [{
|
||||
"data_type": data_type,
|
||||
"context_name": data_type.upper(), # llm context classification
|
||||
"layer_name": layer_name # llm layer classification
|
||||
}
|
||||
}]
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
|
|
|
|||
|
|
@ -10,15 +10,3 @@ async def content_to_cog_layers(filename: str, context, response_model: Type[Bas
|
|||
formatted_text_input = await async_render_template(filename, context)
|
||||
|
||||
return await llm_client.acreate_structured_output(formatted_text_input, formatted_text_input, response_model)
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
# asyncio.run(content_to_cog_layers("generate_cog_layers.txt", {
|
||||
# 'data_type': 'text',
|
||||
# 'context_name': 'Scientific Research',
|
||||
# 'layer_name': 'Content Layer'
|
||||
# }, response_model=CognitiveLayer))
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue