Prepare for the presentation, add info

This commit is contained in:
Vasilije 2024-03-09 17:40:51 +01:00
parent 66b40dfcd0
commit 9ad22edb15
10 changed files with 1614 additions and 1240 deletions

File diff suppressed because one or more lines are too long

View file

@ -43,7 +43,7 @@ class Config:
graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
# Model parameters
model: str = "gpt-4-1106-preview"
model: str = "gpt-4-0125-preview"
model_endpoint: str = "openai"
openai_key: Optional[str] = os.getenv("OPENAI_API_KEY")
openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0))

View file

@ -3,23 +3,20 @@ from typing import Type
from cognitive_architecture.config import Config
from .graph_db_interface import GraphDBInterface
from .networkx.adapter import NetworXAdapter
# Assuming Neo4jAdapter is defined somewhere
# from .neo4j.adapter import Neo4jAdapter
from enum import Enum, auto
from cognitive_architecture.shared.data_models import GraphDBType
config = Config()
config.load()
class GraphDBType(Enum):
NETWORKX = auto()
NEO4J = auto()
def get_graph_client(graph_type: GraphDBType, graph_filename: str) -> Type[GraphDBInterface]:
def get_graph_client(graph_type: GraphDBType, graph_filename: str=None) -> GraphDBInterface :
"""Factory function to get the appropriate graph client based on the graph type."""
if graph_filename is not None:
config.graph_filename = graph_filename
if graph_filename is None:
graph_filename= config.graph_filename
if graph_type == GraphDBType.NETWORKX:
return NetworXAdapter(filename = config.graph_filename) # Adjust as needed for NetworkX adapter configuration
return NetworXAdapter(filename = graph_filename)
elif graph_type == GraphDBType.NEO4J:
# return Neo4jAdapter(config.neo4j_config) # Uncomment and adjust as needed for Neo4j adapter configuration
raise NotImplementedError("Neo4j adapter is not implemented yet.")

View file

@ -7,33 +7,56 @@ class GraphDBInterface(Protocol):
""" Save and Load Graphs """
@abstractmethod
async def save_graph(
async def graph(self):
raise NotImplementedError
@abstractmethod
async def save_graph_to_file(
self,
path: str
file_path: str = None
): raise NotImplementedError
@abstractmethod
async def load_graph(
async def load_graph_from_file(
self,
path: str
file_path: str = None
): raise NotImplementedError
@abstractmethod
async def delete_graph(
async def delete_graph_from_file(
self,
path: str
path: str = None
): raise NotImplementedError
""" CRUD operations on graph nodes """
@abstractmethod
async def create(self,
user_id:str,
custom_user_properties:str,
required_layers:list,
default_fields:dict
async def add_node(
self,
id: str,
**kwargs
): raise NotImplementedError
@abstractmethod
async def delete_node(
self,
id: str
): raise NotImplementedError
""" CRUD operations on graph edges """
@abstractmethod
async def add_edge(
self,
from_node: str,
to_node: str,
**kwargs
): raise NotImplementedError
# @abstractmethod
# async def create_vector_index(
# self,
@ -48,13 +71,13 @@ class GraphDBInterface(Protocol):
# vector_index_config: object
# ): raise NotImplementedError
""" Data points """
@abstractmethod
async def create_data_points(
self,
collection_name: str,
data_points: List[any]
): raise NotImplementedError
# """ Data points """
# @abstractmethod
# async def create_data_points(
# self,
# collection_name: str,
# data_points: List[any]
# ): raise NotImplementedError
# @abstractmethod
# async def get_data_point(

View file

@ -1,91 +1,177 @@
"""Adapter for NetworkX graph database."""
import json
import os
import pickle
from datetime import datetime
from typing import Optional, Dict, Any
import aiofiles.os
import aiofiles
import networkx as nx
from cognitive_architecture.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
import logging
class NetworXAdapter(GraphDBInterface):
_instance = None # Class variable to store the singleton instance
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(NetworXAdapter, cls).__new__(cls)
return cls._instance
def __init__(self, filename="cognee_graph.pkl"):
self.filename = filename
self.graph = nx.MultiDiGraph()
async def graph(self):
return self.graph
# G = await client.load_graph_from_file()
# if G is None:
# G = client.graph # Directly access the graph attribute without calling it
# return G
async def save_graph(self, path: str):
"""Asynchronously save the graph to a file."""
if path is not None:
path = self.filename
async def add_node(self, id: str, **kwargs) -> None:
"""Asynchronously add a node to the graph if it doesn't already exist, with given properties."""
if not self.graph.has_node(id):
self.graph.add_node(id, **kwargs)
await self.save_graph_to_file(self.filename)
async def add_edge(self, from_node: str, to_node: str, **kwargs ) -> None:
"""Asynchronously add an edge between two nodes with optional properties."""
# properties = properties or {}
self.graph.add_edge(from_node, to_node, **kwargs)
await self.save_graph_to_file(self.filename)
async def delete_node(self, id: str) -> None:
"""Asynchronously delete a node from the graph if it exists."""
if self.graph.has_node(id):
self.graph.remove_node(id)
await self.save_graph_to_file(self.filename)
async def save_graph_to_file(self, file_path: str=None) -> None:
"""Asynchronously save the graph to a file in JSON format."""
if not file_path:
file_path = self.filename
graph_data = nx.readwrite.json_graph.node_link_data(self.graph)
async with aiofiles.open(file_path, 'w') as file:
await file.write(json.dumps(graph_data))
async def load_graph_from_file(self, file_path: str = None):
"""Asynchronously load the graph from a file in JSON format."""
if not file_path:
file_path = self.filename
try:
async with aiofiles.open(path, "wb") as f:
await f.write(pickle.dumps(self.graph))
logging.info("Graph saved successfully.")
if os.path.exists(file_path):
async with aiofiles.open(file_path, 'r') as file:
graph_data = json.loads(await file.read())
self.graph = nx.readwrite.json_graph.node_link_graph(graph_data)
return self.graph
else:
# Log that the file does not exist and an empty graph is initialized
logging.warning(f"File {file_path} not found. Initializing an empty graph.")
self.graph = nx.MultiDiGraph() # Use MultiDiGraph to keep it consistent with __init__
return self.graph
except Exception as e:
logging.error(f"Failed to save graph: {e}")
logging.error(f"Failed to load graph from {file_path}: {e}")
# Consider initializing an empty graph in case of error
self.graph = nx.MultiDiGraph()
return self.graph
async def load_graph(self, path: str):
if path is not None:
path = self.filename
async def delete_graph_from_file(self, path: str = None):
"""Asynchronously delete the graph file from the filesystem."""
if path is None:
path = self.filename # Assuming self.filename is defined elsewhere and holds the default graph file path
try:
async with aiofiles.open(path, "rb") as f:
data = await f.read()
self.graph = pickle.loads(data)
logging.info("Graph loaded successfully.")
except Exception as e:
logging.error(f"Failed to load graph: {e}")
async def delete_graph(self, path: str):
if path is not None:
path = self.filename
try:
async with aiofiles.open(path, "wb") as f:
await f.write(pickle.dumps(self.graph))
await aiofiles.os.remove(path) # Asynchronously remove the file
logging.info("Graph deleted successfully.")
except Exception as e:
logging.error(f"Failed to delete graph: {e}")
async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None):
"""Asynchronously create or update a user content graph based on given parameters."""
# Assume required_layers is a dictionary-like object; use more robust validation in production
category_name = required_layers['name']
subgroup_names = [subgroup['name'] for subgroup in required_layers['cognitive_subgroups']]
# Construct the additional_categories structure
additional_categories = {category_name: subgroup_names}
# Define default fields for all nodes if not provided
if default_fields is None:
default_fields = {
'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# Merge custom user properties with default fields; custom properties take precedence
user_properties = {**default_fields, **(custom_user_properties or {})}
# Default content categories and update with any additional categories provided
content_categories = {
"Temporal": ["Historical events", "Schedules and timelines"],
"Positional": ["Geographical locations", "Spatial data"],
"Propositions": ["Hypotheses and theories", "Claims and arguments"],
"Personalization": ["User preferences", "User information"]
}
content_categories.update(additional_categories)
# Ensure the user node exists with properties
self.graph.add_node(user_id, **user_properties, exist=True)
# Add or update content category nodes and their edges
for category, subclasses in content_categories.items():
category_properties = {**default_fields, 'type': 'category'}
self.graph.add_node(category, **category_properties, exist=True)
self.graph.add_edge(user_id, category, relationship='created')
# Add or update subclass nodes and their edges
for subclass in subclasses:
subclass_node_id = f"{category}:{subclass}"
subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
self.graph.add_edge(category, subclass_node_id, relationship='includes')
# Save the graph asynchronously after modifications
await self.save_graph()
# async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
# """Asynchronously create or update a user content graph based on given parameters."""
# # Assume required_layers is a dictionary-like object; use more robust validation in production
# category_name = required_layers['context_name']
# subgroup_names = [required_layers['layer_name']]
#
# # Construct the additional_categories structure
# additional_categories = {category_name: subgroup_names}
#
# # Define default fields for all nodes if not provided
# if default_fields is None:
# default_fields = {
# 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# }
#
# # Merge custom user properties with default fields; custom properties take precedence
# user_properties = {**default_fields, **(custom_user_properties or {})}
#
# # Default content categories and update with any additional categories provided
# content_categories = {
# "Temporal": ["Historical events", "Schedules and timelines"],
# "Positional": ["Geographical locations", "Spatial data"],
# "Propositions": ["Hypotheses and theories", "Claims and arguments"],
# "Personalization": ["User preferences", "User information"]
# }
#
# content_categories = {
# "Temporal": ["Historical events", "Schedules and timelines"],
# "Positional": ["Geographical locations", "Spatial data"],
# "Propositions": ["Hypotheses and theories", "Claims and arguments"],
# "Personalization": ["User preferences", "User information"]
# }
#
# # Update content categories with any additional categories provided
# if additional_categories:
# content_categories.update(additional_categories)
#
# G = existing_graph if existing_graph else self.graph
#
# # Check if the user node already exists, if not, add the user node with properties
# if not G.has_node(user_id):
# G.add_node(user_id, **user_properties)
#
# # Add or update content category nodes and their edges
# for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
#
# # Add or update the category node
# if not G.has_node(category):
# G.add_node(category, **category_properties)
# G.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# # Using both category and subclass names to ensure uniqueness within categories
# subclass_node_id = f"{category}:{subclass}"
#
# # Check if subclass node exists before adding, based on node content
# if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# G.add_node(subclass_node_id, **subclass_properties)
# G.add_edge(category, subclass_node_id, relationship='includes')
#
# return G
# content_categories.update(additional_categories)
#
# # Ensure the user node exists with properties
# self.graph.add_node(user_id, **user_properties, exist=True)
#
# # Add or update content category nodes and their edges
# for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
# self.graph.add_node(category, **category_properties, exist=True)
# self.graph.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# subclass_node_id = f"{category}:{subclass}"
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
# self.graph.add_edge(category, subclass_node_id, relationship='includes')
#
# # Save the graph asynchronously after modifications
# # await self.save_graph()
#
# return self.graph

View file

@ -1,17 +1,146 @@
from cognitive_architecture.infrastructure.graph.get_graph_client import get_graph_client
""" This module is responsible for creating a semantic graph """
from datetime import datetime
from enum import Enum, auto
from typing import Type, Optional, Any
from pydantic import BaseModel
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation
def create_semantic_graph(
text_input: str,
filename: str,
context,
response_model: Type[BaseModel]
) -> KnowledgeGraph:
graph_type = GraphDBType.NEO4J
async def generate_node_id(instance: BaseModel) -> str:
for field in ['id', 'doc_id', 'location_id', 'type_id']:
if hasattr(instance, field):
return f"{instance.__class__.__name__}:{getattr(instance, field)}"
return f"{instance.__class__.__name__}:default"
async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node_data: dict, relationship_data: dict):
await client.add_node(node_id, **node_data) # Add the current node with its data
if parent_id:
# Add an edge between the parent node and the current node with the correct relationship data
await client.add_edge(parent_id, node_id, **relationship_data)
async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):
if isinstance(value, BaseModel):
node_id = await generate_node_id(value)
node_data = value.dict(exclude={'default_relationship'})
# Use the specified default relationship for the edge between the parent node and the current node
relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}
await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)
# Recursively process nested attributes to ensure all nodes and relationships are added to the graph
for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly
await process_attribute(G, node_id, sub_attr, sub_val)
elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):
# For lists of BaseModel instances, process each item in the list
for item in value:
await process_attribute(G, parent_id, attribute, item)
async def create_dynamic(graph_model, client) :
await client.load_graph_from_file()
root_id = await generate_node_id(graph_model)
node_data = graph_model.dict(exclude={'default_relationship', 'id'})
print(node_data)
await client.add_node(root_id, **node_data)
for attribute_name, attribute_value in graph_model:
await process_attribute(client, root_id, attribute_name, attribute_value)
return client
async def create_semantic_graph(
):
graph_type = GraphDBType.NETWORKX
# Call the get_graph_client function with the selected graph type
graph_client = get_graph_client(graph_type)
GraphDBInterface
print(graph_client)
await graph_client.load_graph_from_file()
#
#
#
# b = await graph_client.add_node("23ds", {
# 'username': 'exampleUser',
# 'email': 'user@example.com'
# })
#
# await graph_client.save_graph_to_file(b)
graph_model_instance = DefaultGraphModel(
id="user123",
documents=[
Document(
doc_id="doc1",
title="Document 1",
summary="Summary of Document 1",
content_id="content_id_for_doc1", # Assuming external content storage ID
doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
categories=[
Category(category_id="finance", name="Finance",
default_relationship=Relationship(type="belongs_to")),
Category(category_id="tech", name="Technology",
default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type='has_document')
),
Document(
doc_id="doc2",
title="Document 2",
summary="Summary of Document 2",
content_id="content_id_for_doc2",
doc_type=DocumentType(type_id="TXT", description="Text File"),
categories=[
Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
Category(category_id="wellness", name="Wellness",
default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type='has_document')
)
],
user_properties=UserProperties(
custom_properties={"age": "30"},
location=UserLocation(location_id="ny", description="New York",
default_relationship=Relationship(type='located_in'))
),
default_fields={
'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
)
G = await create_dynamic(graph_model_instance, graph_client)
# print("Nodes and their data:")
# for node, data in G.graph.nodes(data=True):
# print(node, data)
#
# # Print edges with their data
# print("\nEdges and their data:")
# for source, target, data in G.graph.edges(data=True):
# print(f"{source} -> {target} {data}")
# print(G)
# return await graph_client.create( user_id = user_id, custom_user_properties=custom_user_properties, required_layers=required_layers, default_fields=default_fields, existing_graph=existing_graph)
if __name__ == "__main__":
import asyncio
user_id = 'user123'
custom_user_properties = {
'username': 'exampleUser',
'email': 'user@example.com'
}
asyncio.run(create_semantic_graph())

View file

@ -2,7 +2,7 @@
from pydantic import BaseModel
from typing import Type
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import ContentPrediction
from cognitive_architecture.shared.data_models import DefaultContentPrediction
from cognitive_architecture.utils import read_query_prompt
async def classify_into_categories(text_input: str, system_prompt_path:str, response_model: Type[BaseModel]):

View file

@ -2,7 +2,7 @@
from typing import Type
from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import CognitiveLayer
from cognitive_architecture.shared.data_models import DefaultCognitiveLayer
from cognitive_architecture.utils import async_render_template
async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]):

View file

@ -1,4 +1,5 @@
""" This module is responsible for converting content to cognitive layers. """
import json
from typing import Type
from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
@ -10,7 +11,13 @@ async def generate_graph(text_input:str, filename: str,context, response_model:
llm_client = get_llm_client()
formatted_text_input = await async_render_template(filename, context)
return await llm_client.acreate_structured_output(text_input,formatted_text_input, response_model)
output = await llm_client.acreate_structured_output(text_input, formatted_text_input, response_model)
context_key = json.dumps(context, sort_keys=True)
# Returning a dictionary with context as the key and the awaited output as its value
return {context_key: output}
if __name__ == "__main__":

View file

@ -1,6 +1,6 @@
"""Data models for the cognitive architecture."""
from enum import Enum
from typing import Optional, List, Union
from enum import Enum, auto
from typing import Optional, List, Union, Dict, Any
from pydantic import BaseModel, Field
@ -161,7 +161,7 @@ class ProceduralContent(ContentType):
type:str = "PROCEDURAL"
subclass: List[ProceduralSubclass]
class ContentPrediction(BaseModel):
class DefaultContentPrediction(BaseModel):
"""Class for a single class label prediction."""
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
@ -174,8 +174,53 @@ class CognitiveLayerSubgroup(BaseModel):
description: str
class CognitiveLayer(BaseModel):
class DefaultCognitiveLayer(BaseModel):
"""Cognitive layer"""
category_name:str
cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list)
class GraphDBType(Enum):
NETWORKX = auto()
NEO4J = auto()
# Models for representing different entities
class Relationship(BaseModel):
type: str
properties: Optional[Dict[str, Any]] = None
class DocumentType(BaseModel):
type_id: str
description: str
default_relationship: Relationship = Relationship(type='is_type')
class Category(BaseModel):
category_id: str
name: str
default_relationship: Relationship = Relationship(type='categorized_as')
class Document(BaseModel):
doc_id: str
title: str
summary: Optional[str] = None
content_id: Optional[str] = None
doc_type: Optional[DocumentType] = None
categories: List[Category] = []
default_relationship: Relationship = Relationship(type='has_document')
class UserLocation(BaseModel):
location_id: str
description: str
default_relationship: Relationship = Relationship(type='located_in')
class UserProperties(BaseModel):
custom_properties: Optional[Dict[str, Any]] = None
location: Optional[UserLocation] = None
class DefaultGraphModel(BaseModel):
id: str
user_properties: UserProperties = UserProperties()
documents: List[Document] = []
default_fields: Optional[Dict[str, Any]] = {}