Prepare for the presentation, add info

This commit is contained in:
Vasilije 2024-03-09 17:40:51 +01:00
parent 66b40dfcd0
commit 9ad22edb15
10 changed files with 1614 additions and 1240 deletions

File diff suppressed because one or more lines are too long

View file

@ -43,7 +43,7 @@ class Config:
graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl") graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
# Model parameters # Model parameters
model: str = "gpt-4-1106-preview" model: str = "gpt-4-0125-preview"
model_endpoint: str = "openai" model_endpoint: str = "openai"
openai_key: Optional[str] = os.getenv("OPENAI_API_KEY") openai_key: Optional[str] = os.getenv("OPENAI_API_KEY")
openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0)) openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0))

View file

@ -3,23 +3,20 @@ from typing import Type
from cognitive_architecture.config import Config from cognitive_architecture.config import Config
from .graph_db_interface import GraphDBInterface from .graph_db_interface import GraphDBInterface
from .networkx.adapter import NetworXAdapter from .networkx.adapter import NetworXAdapter
# Assuming Neo4jAdapter is defined somewhere
# from .neo4j.adapter import Neo4jAdapter # from .neo4j.adapter import Neo4jAdapter
from enum import Enum, auto from enum import Enum, auto
from cognitive_architecture.shared.data_models import GraphDBType
config = Config() config = Config()
config.load() config.load()
class GraphDBType(Enum):
NETWORKX = auto() def get_graph_client(graph_type: GraphDBType, graph_filename: str=None) -> GraphDBInterface :
NEO4J = auto()
def get_graph_client(graph_type: GraphDBType, graph_filename: str) -> Type[GraphDBInterface]:
"""Factory function to get the appropriate graph client based on the graph type.""" """Factory function to get the appropriate graph client based on the graph type."""
if graph_filename is not None: if graph_filename is None:
config.graph_filename = graph_filename graph_filename= config.graph_filename
if graph_type == GraphDBType.NETWORKX: if graph_type == GraphDBType.NETWORKX:
return NetworXAdapter(filename = config.graph_filename) # Adjust as needed for NetworkX adapter configuration return NetworXAdapter(filename = graph_filename)
elif graph_type == GraphDBType.NEO4J: elif graph_type == GraphDBType.NEO4J:
# return Neo4jAdapter(config.neo4j_config) # Uncomment and adjust as needed for Neo4j adapter configuration # return Neo4jAdapter(config.neo4j_config) # Uncomment and adjust as needed for Neo4j adapter configuration
raise NotImplementedError("Neo4j adapter is not implemented yet.") raise NotImplementedError("Neo4j adapter is not implemented yet.")

View file

@ -7,33 +7,56 @@ class GraphDBInterface(Protocol):
""" Save and Load Graphs """ """ Save and Load Graphs """
@abstractmethod @abstractmethod
async def save_graph( async def graph(self):
raise NotImplementedError
@abstractmethod
async def save_graph_to_file(
self, self,
path: str file_path: str = None
): raise NotImplementedError ): raise NotImplementedError
@abstractmethod @abstractmethod
async def load_graph( async def load_graph_from_file(
self, self,
path: str file_path: str = None
): raise NotImplementedError ): raise NotImplementedError
@abstractmethod @abstractmethod
async def delete_graph( async def delete_graph_from_file(
self, self,
path: str path: str = None
): raise NotImplementedError ): raise NotImplementedError
""" CRUD operations on graph nodes """ """ CRUD operations on graph nodes """
@abstractmethod @abstractmethod
async def create(self,
user_id:str, async def add_node(
custom_user_properties:str, self,
required_layers:list, id: str,
default_fields:dict **kwargs
): raise NotImplementedError ): raise NotImplementedError
@abstractmethod
async def delete_node(
self,
id: str
): raise NotImplementedError
""" CRUD operations on graph edges """
@abstractmethod
async def add_edge(
self,
from_node: str,
to_node: str,
**kwargs
): raise NotImplementedError
# @abstractmethod # @abstractmethod
# async def create_vector_index( # async def create_vector_index(
# self, # self,
@ -48,13 +71,13 @@ class GraphDBInterface(Protocol):
# vector_index_config: object # vector_index_config: object
# ): raise NotImplementedError # ): raise NotImplementedError
""" Data points """ # """ Data points """
@abstractmethod # @abstractmethod
async def create_data_points( # async def create_data_points(
self, # self,
collection_name: str, # collection_name: str,
data_points: List[any] # data_points: List[any]
): raise NotImplementedError # ): raise NotImplementedError
# @abstractmethod # @abstractmethod
# async def get_data_point( # async def get_data_point(

View file

@ -1,91 +1,177 @@
"""Adapter for NetworkX graph database."""
import json
import os
import pickle import pickle
from datetime import datetime from datetime import datetime
from typing import Optional, Dict, Any
import aiofiles.os
import aiofiles import aiofiles
import networkx as nx import networkx as nx
from cognitive_architecture.infrastructure.databases.graph.graph_db_interface import GraphDBInterface from cognitive_architecture.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
import logging import logging
class NetworXAdapter(GraphDBInterface): class NetworXAdapter(GraphDBInterface):
_instance = None # Class variable to store the singleton instance
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(NetworXAdapter, cls).__new__(cls)
return cls._instance
def __init__(self, filename="cognee_graph.pkl"): def __init__(self, filename="cognee_graph.pkl"):
self.filename = filename self.filename = filename
self.graph = nx.MultiDiGraph() self.graph = nx.MultiDiGraph()
async def graph(self):
return self.graph
# G = await client.load_graph_from_file()
# if G is None:
# G = client.graph # Directly access the graph attribute without calling it
# return G
async def save_graph(self, path: str):
"""Asynchronously save the graph to a file.""" async def add_node(self, id: str, **kwargs) -> None:
if path is not None: """Asynchronously add a node to the graph if it doesn't already exist, with given properties."""
path = self.filename if not self.graph.has_node(id):
self.graph.add_node(id, **kwargs)
await self.save_graph_to_file(self.filename)
async def add_edge(self, from_node: str, to_node: str, **kwargs ) -> None:
"""Asynchronously add an edge between two nodes with optional properties."""
# properties = properties or {}
self.graph.add_edge(from_node, to_node, **kwargs)
await self.save_graph_to_file(self.filename)
async def delete_node(self, id: str) -> None:
"""Asynchronously delete a node from the graph if it exists."""
if self.graph.has_node(id):
self.graph.remove_node(id)
await self.save_graph_to_file(self.filename)
async def save_graph_to_file(self, file_path: str=None) -> None:
"""Asynchronously save the graph to a file in JSON format."""
if not file_path:
file_path = self.filename
graph_data = nx.readwrite.json_graph.node_link_data(self.graph)
async with aiofiles.open(file_path, 'w') as file:
await file.write(json.dumps(graph_data))
async def load_graph_from_file(self, file_path: str = None):
"""Asynchronously load the graph from a file in JSON format."""
if not file_path:
file_path = self.filename
try: try:
async with aiofiles.open(path, "wb") as f: if os.path.exists(file_path):
await f.write(pickle.dumps(self.graph)) async with aiofiles.open(file_path, 'r') as file:
logging.info("Graph saved successfully.") graph_data = json.loads(await file.read())
self.graph = nx.readwrite.json_graph.node_link_graph(graph_data)
return self.graph
else:
# Log that the file does not exist and an empty graph is initialized
logging.warning(f"File {file_path} not found. Initializing an empty graph.")
self.graph = nx.MultiDiGraph() # Use MultiDiGraph to keep it consistent with __init__
return self.graph
except Exception as e: except Exception as e:
logging.error(f"Failed to save graph: {e}") logging.error(f"Failed to load graph from {file_path}: {e}")
# Consider initializing an empty graph in case of error
self.graph = nx.MultiDiGraph()
return self.graph
async def load_graph(self, path: str): async def delete_graph_from_file(self, path: str = None):
if path is not None: """Asynchronously delete the graph file from the filesystem."""
path = self.filename if path is None:
path = self.filename # Assuming self.filename is defined elsewhere and holds the default graph file path
try: try:
async with aiofiles.open(path, "rb") as f: await aiofiles.os.remove(path) # Asynchronously remove the file
data = await f.read()
self.graph = pickle.loads(data)
logging.info("Graph loaded successfully.")
except Exception as e:
logging.error(f"Failed to load graph: {e}")
async def delete_graph(self, path: str):
if path is not None:
path = self.filename
try:
async with aiofiles.open(path, "wb") as f:
await f.write(pickle.dumps(self.graph))
logging.info("Graph deleted successfully.") logging.info("Graph deleted successfully.")
except Exception as e: except Exception as e:
logging.error(f"Failed to delete graph: {e}") logging.error(f"Failed to delete graph: {e}")
async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None): # async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
"""Asynchronously create or update a user content graph based on given parameters.""" # """Asynchronously create or update a user content graph based on given parameters."""
# Assume required_layers is a dictionary-like object; use more robust validation in production # # Assume required_layers is a dictionary-like object; use more robust validation in production
category_name = required_layers['name'] # category_name = required_layers['context_name']
subgroup_names = [subgroup['name'] for subgroup in required_layers['cognitive_subgroups']] # subgroup_names = [required_layers['layer_name']]
#
# Construct the additional_categories structure # # Construct the additional_categories structure
additional_categories = {category_name: subgroup_names} # additional_categories = {category_name: subgroup_names}
#
# Define default fields for all nodes if not provided # # Define default fields for all nodes if not provided
if default_fields is None: # if default_fields is None:
default_fields = { # default_fields = {
'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") # 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
} # }
#
# Merge custom user properties with default fields; custom properties take precedence # # Merge custom user properties with default fields; custom properties take precedence
user_properties = {**default_fields, **(custom_user_properties or {})} # user_properties = {**default_fields, **(custom_user_properties or {})}
#
# Default content categories and update with any additional categories provided # # Default content categories and update with any additional categories provided
content_categories = { # content_categories = {
"Temporal": ["Historical events", "Schedules and timelines"], # "Temporal": ["Historical events", "Schedules and timelines"],
"Positional": ["Geographical locations", "Spatial data"], # "Positional": ["Geographical locations", "Spatial data"],
"Propositions": ["Hypotheses and theories", "Claims and arguments"], # "Propositions": ["Hypotheses and theories", "Claims and arguments"],
"Personalization": ["User preferences", "User information"] # "Personalization": ["User preferences", "User information"]
} # }
content_categories.update(additional_categories) #
# content_categories = {
# Ensure the user node exists with properties # "Temporal": ["Historical events", "Schedules and timelines"],
self.graph.add_node(user_id, **user_properties, exist=True) # "Positional": ["Geographical locations", "Spatial data"],
# "Propositions": ["Hypotheses and theories", "Claims and arguments"],
# Add or update content category nodes and their edges # "Personalization": ["User preferences", "User information"]
for category, subclasses in content_categories.items(): # }
category_properties = {**default_fields, 'type': 'category'} #
self.graph.add_node(category, **category_properties, exist=True) # # Update content categories with any additional categories provided
self.graph.add_edge(user_id, category, relationship='created') # if additional_categories:
# content_categories.update(additional_categories)
# Add or update subclass nodes and their edges #
for subclass in subclasses: # G = existing_graph if existing_graph else self.graph
subclass_node_id = f"{category}:{subclass}" #
subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass} # # Check if the user node already exists, if not, add the user node with properties
self.graph.add_node(subclass_node_id, **subclass_properties, exist=True) # if not G.has_node(user_id):
self.graph.add_edge(category, subclass_node_id, relationship='includes') # G.add_node(user_id, **user_properties)
#
# Save the graph asynchronously after modifications # # Add or update content category nodes and their edges
await self.save_graph() # for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
#
# # Add or update the category node
# if not G.has_node(category):
# G.add_node(category, **category_properties)
# G.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# # Using both category and subclass names to ensure uniqueness within categories
# subclass_node_id = f"{category}:{subclass}"
#
# # Check if subclass node exists before adding, based on node content
# if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# G.add_node(subclass_node_id, **subclass_properties)
# G.add_edge(category, subclass_node_id, relationship='includes')
#
# return G
# content_categories.update(additional_categories)
#
# # Ensure the user node exists with properties
# self.graph.add_node(user_id, **user_properties, exist=True)
#
# # Add or update content category nodes and their edges
# for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
# self.graph.add_node(category, **category_properties, exist=True)
# self.graph.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# subclass_node_id = f"{category}:{subclass}"
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
# self.graph.add_edge(category, subclass_node_id, relationship='includes')
#
# # Save the graph asynchronously after modifications
# # await self.save_graph()
#
# return self.graph

View file

@ -1,17 +1,146 @@
from cognitive_architecture.infrastructure.graph.get_graph_client import get_graph_client """ This module is responsible for creating a semantic graph """
from datetime import datetime
from enum import Enum, auto
from typing import Type, Optional, Any
from pydantic import BaseModel
from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation
def create_semantic_graph( async def generate_node_id(instance: BaseModel) -> str:
text_input: str, for field in ['id', 'doc_id', 'location_id', 'type_id']:
filename: str, if hasattr(instance, field):
context, return f"{instance.__class__.__name__}:{getattr(instance, field)}"
response_model: Type[BaseModel] return f"{instance.__class__.__name__}:default"
) -> KnowledgeGraph: async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node_data: dict, relationship_data: dict):
graph_type = GraphDBType.NEO4J await client.add_node(node_id, **node_data) # Add the current node with its data
if parent_id:
# Add an edge between the parent node and the current node with the correct relationship data
await client.add_edge(parent_id, node_id, **relationship_data)
async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):
if isinstance(value, BaseModel):
node_id = await generate_node_id(value)
node_data = value.dict(exclude={'default_relationship'})
# Use the specified default relationship for the edge between the parent node and the current node
relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}
await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)
# Recursively process nested attributes to ensure all nodes and relationships are added to the graph
for sub_attr, sub_val in value.__dict__.items(): # Access attributes and their values directly
await process_attribute(G, node_id, sub_attr, sub_val)
elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):
# For lists of BaseModel instances, process each item in the list
for item in value:
await process_attribute(G, parent_id, attribute, item)
async def create_dynamic(graph_model, client) :
await client.load_graph_from_file()
root_id = await generate_node_id(graph_model)
node_data = graph_model.dict(exclude={'default_relationship', 'id'})
print(node_data)
await client.add_node(root_id, **node_data)
for attribute_name, attribute_value in graph_model:
await process_attribute(client, root_id, attribute_name, attribute_value)
return client
async def create_semantic_graph(
):
graph_type = GraphDBType.NETWORKX
# Call the get_graph_client function with the selected graph type # Call the get_graph_client function with the selected graph type
graph_client = get_graph_client(graph_type) graph_client = get_graph_client(graph_type)
GraphDBInterface print(graph_client)
await graph_client.load_graph_from_file()
#
#
#
# b = await graph_client.add_node("23ds", {
# 'username': 'exampleUser',
# 'email': 'user@example.com'
# })
#
# await graph_client.save_graph_to_file(b)
graph_model_instance = DefaultGraphModel(
id="user123",
documents=[
Document(
doc_id="doc1",
title="Document 1",
summary="Summary of Document 1",
content_id="content_id_for_doc1", # Assuming external content storage ID
doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
categories=[
Category(category_id="finance", name="Finance",
default_relationship=Relationship(type="belongs_to")),
Category(category_id="tech", name="Technology",
default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type='has_document')
),
Document(
doc_id="doc2",
title="Document 2",
summary="Summary of Document 2",
content_id="content_id_for_doc2",
doc_type=DocumentType(type_id="TXT", description="Text File"),
categories=[
Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
Category(category_id="wellness", name="Wellness",
default_relationship=Relationship(type="belongs_to"))
],
default_relationship=Relationship(type='has_document')
)
],
user_properties=UserProperties(
custom_properties={"age": "30"},
location=UserLocation(location_id="ny", description="New York",
default_relationship=Relationship(type='located_in'))
),
default_fields={
'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
)
G = await create_dynamic(graph_model_instance, graph_client)
# print("Nodes and their data:")
# for node, data in G.graph.nodes(data=True):
# print(node, data)
#
# # Print edges with their data
# print("\nEdges and their data:")
# for source, target, data in G.graph.edges(data=True):
# print(f"{source} -> {target} {data}")
# print(G)
# return await graph_client.create( user_id = user_id, custom_user_properties=custom_user_properties, required_layers=required_layers, default_fields=default_fields, existing_graph=existing_graph)
if __name__ == "__main__":
import asyncio
user_id = 'user123'
custom_user_properties = {
'username': 'exampleUser',
'email': 'user@example.com'
}
asyncio.run(create_semantic_graph())

View file

@ -2,7 +2,7 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Type from typing import Type
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import ContentPrediction from cognitive_architecture.shared.data_models import DefaultContentPrediction
from cognitive_architecture.utils import read_query_prompt from cognitive_architecture.utils import read_query_prompt
async def classify_into_categories(text_input: str, system_prompt_path:str, response_model: Type[BaseModel]): async def classify_into_categories(text_input: str, system_prompt_path:str, response_model: Type[BaseModel]):

View file

@ -2,7 +2,7 @@
from typing import Type from typing import Type
from pydantic import BaseModel from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
from cognitive_architecture.shared.data_models import CognitiveLayer from cognitive_architecture.shared.data_models import DefaultCognitiveLayer
from cognitive_architecture.utils import async_render_template from cognitive_architecture.utils import async_render_template
async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]): async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]):

View file

@ -1,4 +1,5 @@
""" This module is responsible for converting content to cognitive layers. """ """ This module is responsible for converting content to cognitive layers. """
import json
from typing import Type from typing import Type
from pydantic import BaseModel from pydantic import BaseModel
from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
@ -10,7 +11,13 @@ async def generate_graph(text_input:str, filename: str,context, response_model:
llm_client = get_llm_client() llm_client = get_llm_client()
formatted_text_input = await async_render_template(filename, context) formatted_text_input = await async_render_template(filename, context)
return await llm_client.acreate_structured_output(text_input,formatted_text_input, response_model) output = await llm_client.acreate_structured_output(text_input, formatted_text_input, response_model)
context_key = json.dumps(context, sort_keys=True)
# Returning a dictionary with context as the key and the awaited output as its value
return {context_key: output}
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,6 +1,6 @@
"""Data models for the cognitive architecture.""" """Data models for the cognitive architecture."""
from enum import Enum from enum import Enum, auto
from typing import Optional, List, Union from typing import Optional, List, Union, Dict, Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -161,7 +161,7 @@ class ProceduralContent(ContentType):
type:str = "PROCEDURAL" type:str = "PROCEDURAL"
subclass: List[ProceduralSubclass] subclass: List[ProceduralSubclass]
class ContentPrediction(BaseModel): class DefaultContentPrediction(BaseModel):
"""Class for a single class label prediction.""" """Class for a single class label prediction."""
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent] label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
@ -174,8 +174,53 @@ class CognitiveLayerSubgroup(BaseModel):
description: str description: str
class CognitiveLayer(BaseModel): class DefaultCognitiveLayer(BaseModel):
"""Cognitive layer""" """Cognitive layer"""
category_name:str category_name:str
cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list) cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list)
class GraphDBType(Enum):
NETWORKX = auto()
NEO4J = auto()
# Models for representing different entities
class Relationship(BaseModel):
type: str
properties: Optional[Dict[str, Any]] = None
class DocumentType(BaseModel):
type_id: str
description: str
default_relationship: Relationship = Relationship(type='is_type')
class Category(BaseModel):
category_id: str
name: str
default_relationship: Relationship = Relationship(type='categorized_as')
class Document(BaseModel):
doc_id: str
title: str
summary: Optional[str] = None
content_id: Optional[str] = None
doc_type: Optional[DocumentType] = None
categories: List[Category] = []
default_relationship: Relationship = Relationship(type='has_document')
class UserLocation(BaseModel):
location_id: str
description: str
default_relationship: Relationship = Relationship(type='located_in')
class UserProperties(BaseModel):
custom_properties: Optional[Dict[str, Any]] = None
location: Optional[UserLocation] = None
class DefaultGraphModel(BaseModel):
id: str
user_properties: UserProperties = UserProperties()
documents: List[Document] = []
default_fields: Optional[Dict[str, Any]] = {}