Prepare for the presentation, add info

2024-03-09 17:40:51 +01:00 · 2024-03-09 17:40:51 +01:00 · 9ad22edb15
commit 9ad22edb15
parent 66b40dfcd0
10 changed files with 1614 additions and 1240 deletions
--- a/Demo_graph.ipynb
+++ b/Demo_graph.ipynb
--- a/cognitive_architecture/config.py
+++ b/cognitive_architecture/config.py
@ -43,7 +43,7 @@ class Config:
    graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
    # Model parameters
-    model: str = "gpt-4-1106-preview"
+    model: str = "gpt-4-0125-preview"
    model_endpoint: str = "openai"
    openai_key: Optional[str] = os.getenv("OPENAI_API_KEY")
    openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
--- a/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py
+++ b/cognitive_architecture/infrastructure/databases/graph/get_graph_client.py
@ -3,23 +3,20 @@ from typing import Type
 from cognitive_architecture.config import Config
 from .graph_db_interface import GraphDBInterface
 from .networkx.adapter import NetworXAdapter
 # Assuming Neo4jAdapter is defined somewhere
 # from .neo4j.adapter import Neo4jAdapter
 from enum import Enum, auto
-
+from cognitive_architecture.shared.data_models import GraphDBType
 config = Config()
 config.load()
-class GraphDBType(Enum):
+
-    NETWORKX = auto()
+def get_graph_client(graph_type: GraphDBType, graph_filename: str=None) -> GraphDBInterface :
    NEO4J = auto()
 def get_graph_client(graph_type: GraphDBType, graph_filename: str) -> Type[GraphDBInterface]:
    """Factory function to get the appropriate graph client based on the graph type."""
-    if graph_filename is not None:
+    if graph_filename is None:
-        config.graph_filename = graph_filename
+        graph_filename= config.graph_filename
    if graph_type == GraphDBType.NETWORKX:
-        return NetworXAdapter(filename = config.graph_filename)  # Adjust as needed for NetworkX adapter configuration
+        return  NetworXAdapter(filename = graph_filename)
    elif graph_type == GraphDBType.NEO4J:
        # return Neo4jAdapter(config.neo4j_config)  # Uncomment and adjust as needed for Neo4j adapter configuration
        raise NotImplementedError("Neo4j adapter is not implemented yet.")
--- a/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognitive_architecture/infrastructure/databases/graph/graph_db_interface.py
@ -7,33 +7,56 @@ class GraphDBInterface(Protocol):
    """ Save and Load Graphs """
    @abstractmethod
-    async def save_graph(
+    async def graph(self):
        raise NotImplementedError
    @abstractmethod
    async def save_graph_to_file(
        self,
-        path: str
+        file_path: str = None
    ): raise NotImplementedError
    @abstractmethod
-    async def load_graph(
+    async def load_graph_from_file(
        self,
-        path: str
+        file_path: str = None
    ): raise NotImplementedError
    @abstractmethod
-    async def delete_graph(
+    async def delete_graph_from_file(
        self,
-        path: str
+        path: str = None
    ): raise NotImplementedError
    """ CRUD operations on graph nodes """
    @abstractmethod
-    async def create(self,
+
-                     user_id:str,
+    async def add_node(
-                     custom_user_properties:str,
+        self,
-                     required_layers:list,
+        id: str,
-                     default_fields:dict
+        **kwargs
    ): raise NotImplementedError
    @abstractmethod
    async def delete_node(
        self,
        id: str
    ): raise NotImplementedError
    """ CRUD operations on graph edges """
    @abstractmethod
    async def add_edge(
        self,
        from_node: str,
        to_node: str,
        **kwargs
    ): raise NotImplementedError
    # @abstractmethod
    # async def create_vector_index(
    #     self,
@ -48,13 +71,13 @@ class GraphDBInterface(Protocol):
    #     vector_index_config: object
    # ): raise NotImplementedError
-    """ Data points """
+    # """ Data points """
-    @abstractmethod
+    # @abstractmethod
-    async def create_data_points(
+    # async def create_data_points(
-        self,
+    #     self,
-        collection_name: str,
+    #     collection_name: str,
-        data_points: List[any]
+    #     data_points: List[any]
-    ): raise NotImplementedError
+    # ): raise NotImplementedError
    # @abstractmethod
    # async def get_data_point(
--- a/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py
+++ b/cognitive_architecture/infrastructure/databases/graph/networkx/adapter.py
@ -1,91 +1,177 @@
 """Adapter for NetworkX graph database."""
 import json
 import os
 import pickle
 from datetime import datetime
 from typing import Optional, Dict, Any
 import aiofiles.os
 import aiofiles
 import networkx as nx
 from cognitive_architecture.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
 import logging
 class NetworXAdapter(GraphDBInterface):
    _instance = None  # Class variable to store the singleton instance
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(NetworXAdapter, cls).__new__(cls)
        return cls._instance
    def __init__(self, filename="cognee_graph.pkl"):
        self.filename = filename
        self.graph = nx.MultiDiGraph()
    async def graph(self):
        return self.graph
        # G = await client.load_graph_from_file()
        # if G is None:
        #     G = client.graph  # Directly access the graph attribute without calling it
        # return G
-    async def save_graph(self, path: str):
+
-        """Asynchronously save the graph to a file."""
+    async def add_node(self, id: str, **kwargs) -> None:
-        if path is not None:
+        """Asynchronously add a node to the graph if it doesn't already exist, with given properties."""
-            path = self.filename
+        if not self.graph.has_node(id):
            self.graph.add_node(id, **kwargs)
            await self.save_graph_to_file(self.filename)
    async def add_edge(self, from_node: str, to_node: str, **kwargs ) -> None:
        """Asynchronously add an edge between two nodes with optional properties."""
        # properties = properties or {}
        self.graph.add_edge(from_node, to_node, **kwargs)
        await self.save_graph_to_file(self.filename)
    async def delete_node(self, id: str) -> None:
        """Asynchronously delete a node from the graph if it exists."""
        if self.graph.has_node(id):
            self.graph.remove_node(id)
            await self.save_graph_to_file(self.filename)
    async def save_graph_to_file(self, file_path: str=None) -> None:
        """Asynchronously save the graph to a file in JSON format."""
        if not file_path:
            file_path = self.filename
        graph_data = nx.readwrite.json_graph.node_link_data(self.graph)
        async with aiofiles.open(file_path, 'w') as file:
            await file.write(json.dumps(graph_data))
    async def load_graph_from_file(self, file_path: str = None):
        """Asynchronously load the graph from a file in JSON format."""
        if not file_path:
            file_path = self.filename
        try:
-            async with aiofiles.open(path, "wb") as f:
+            if os.path.exists(file_path):
-                await f.write(pickle.dumps(self.graph))
+                async with aiofiles.open(file_path, 'r') as file:
-            logging.info("Graph saved successfully.")
+                    graph_data = json.loads(await file.read())
                    self.graph = nx.readwrite.json_graph.node_link_graph(graph_data)
                    return self.graph
            else:
                # Log that the file does not exist and an empty graph is initialized
                logging.warning(f"File {file_path} not found. Initializing an empty graph.")
                self.graph = nx.MultiDiGraph()  # Use MultiDiGraph to keep it consistent with __init__
                return self.graph
        except Exception as e:
-            logging.error(f"Failed to save graph: {e}")
+            logging.error(f"Failed to load graph from {file_path}: {e}")
            # Consider initializing an empty graph in case of error
            self.graph = nx.MultiDiGraph()
            return self.graph
-    async def load_graph(self, path: str):
+    async def delete_graph_from_file(self, path: str = None):
-        if path is not None:
+        """Asynchronously delete the graph file from the filesystem."""
-            path = self.filename
+        if path is None:
            path = self.filename  # Assuming self.filename is defined elsewhere and holds the default graph file path
        try:
-            async with aiofiles.open(path, "rb") as f:
+            await aiofiles.os.remove(path)  # Asynchronously remove the file
                data = await f.read()
                self.graph = pickle.loads(data)
            logging.info("Graph loaded successfully.")
        except Exception as e:
            logging.error(f"Failed to load graph: {e}")
    async def delete_graph(self, path: str):
        if path is not None:
            path = self.filename
        try:
            async with aiofiles.open(path, "wb") as f:
                await f.write(pickle.dumps(self.graph))
            logging.info("Graph deleted successfully.")
        except Exception as e:
            logging.error(f"Failed to delete graph: {e}")
-    async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None):
+    # async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
-        """Asynchronously create or update a user content graph based on given parameters."""
+    #     """Asynchronously create or update a user content graph based on given parameters."""
-        # Assume required_layers is a dictionary-like object; use more robust validation in production
+    #     # Assume required_layers is a dictionary-like object; use more robust validation in production
-        category_name = required_layers['name']
+    #     category_name = required_layers['context_name']
-        subgroup_names = [subgroup['name'] for subgroup in required_layers['cognitive_subgroups']]
+    #     subgroup_names = [required_layers['layer_name']]
-
+    #
-        # Construct the additional_categories structure
+    #     # Construct the additional_categories structure
-        additional_categories = {category_name: subgroup_names}
+    #     additional_categories = {category_name: subgroup_names}
-
+    #
-        # Define default fields for all nodes if not provided
+    #     # Define default fields for all nodes if not provided
-        if default_fields is None:
+    #     if default_fields is None:
-            default_fields = {
+    #         default_fields = {
-                'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+    #             'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    #             'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            }
+    #         }
-
+    #
-        # Merge custom user properties with default fields; custom properties take precedence
+    #     # Merge custom user properties with default fields; custom properties take precedence
-        user_properties = {**default_fields, **(custom_user_properties or {})}
+    #     user_properties = {**default_fields, **(custom_user_properties or {})}
-
+    #
-        # Default content categories and update with any additional categories provided
+    #     # Default content categories and update with any additional categories provided
-        content_categories = {
+    #     content_categories = {
-            "Temporal": ["Historical events", "Schedules and timelines"],
+    #         "Temporal": ["Historical events", "Schedules and timelines"],
-            "Positional": ["Geographical locations", "Spatial data"],
+    #         "Positional": ["Geographical locations", "Spatial data"],
-            "Propositions": ["Hypotheses and theories", "Claims and arguments"],
+    #         "Propositions": ["Hypotheses and theories", "Claims and arguments"],
-            "Personalization": ["User preferences", "User information"]
+    #         "Personalization": ["User preferences", "User information"]
-        }
+    #     }
-        content_categories.update(additional_categories)
+    #
-
+    #     content_categories = {
-        # Ensure the user node exists with properties
+    #         "Temporal": ["Historical events", "Schedules and timelines"],
-        self.graph.add_node(user_id, **user_properties, exist=True)
+    #         "Positional": ["Geographical locations", "Spatial data"],
-
+    #         "Propositions": ["Hypotheses and theories", "Claims and arguments"],
-        # Add or update content category nodes and their edges
+    #         "Personalization": ["User preferences", "User information"]
-        for category, subclasses in content_categories.items():
+    #     }
-            category_properties = {**default_fields, 'type': 'category'}
+    #
-            self.graph.add_node(category, **category_properties, exist=True)
+    #     # Update content categories with any additional categories provided
-            self.graph.add_edge(user_id, category, relationship='created')
+    #     if additional_categories:
-
+    #         content_categories.update(additional_categories)
-            # Add or update subclass nodes and their edges
+    #
-            for subclass in subclasses:
+    #     G = existing_graph if existing_graph else self.graph
-                subclass_node_id = f"{category}:{subclass}"
+    #
-                subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
+    #     # Check if the user node already exists, if not, add the user node with properties
-                self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
+    #     if not G.has_node(user_id):
-                self.graph.add_edge(category, subclass_node_id, relationship='includes')
+    #         G.add_node(user_id, **user_properties)
-
+    #
-        # Save the graph asynchronously after modifications
+    #     # Add or update content category nodes and their edges
-        await self.save_graph()
+    #     for category, subclasses in content_categories.items():
    #         category_properties = {**default_fields, 'type': 'category'}
    #
    #         # Add or update the category node
    #         if not G.has_node(category):
    #             G.add_node(category, **category_properties)
    #             G.add_edge(user_id, category, relationship='created')
    #
    #         # Add or update subclass nodes and their edges
    #         for subclass in subclasses:
    #             # Using both category and subclass names to ensure uniqueness within categories
    #             subclass_node_id = f"{category}:{subclass}"
    #
    #             # Check if subclass node exists before adding, based on node content
    #             if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):
    #                 subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
    #                 G.add_node(subclass_node_id, **subclass_properties)
    #                 G.add_edge(category, subclass_node_id, relationship='includes')
    #
    #     return G
        # content_categories.update(additional_categories)
        #
        # # Ensure the user node exists with properties
        # self.graph.add_node(user_id, **user_properties, exist=True)
        #
        # # Add or update content category nodes and their edges
        # for category, subclasses in content_categories.items():
        #     category_properties = {**default_fields, 'type': 'category'}
        #     self.graph.add_node(category, **category_properties, exist=True)
        #     self.graph.add_edge(user_id, category, relationship='created')
        #
        #     # Add or update subclass nodes and their edges
        #     for subclass in subclasses:
        #         subclass_node_id = f"{category}:{subclass}"
        #         subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
        #         self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
        #         self.graph.add_edge(category, subclass_node_id, relationship='includes')
        #
        # # Save the graph asynchronously after modifications
        # # await self.save_graph()
        #
        # return self.graph
--- a/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py
+++ b/cognitive_architecture/modules/cognify/graph/create_semantic_graph.py
@ -1,17 +1,146 @@
-from cognitive_architecture.infrastructure.graph.get_graph_client import get_graph_client
+""" This module is responsible for creating a semantic graph """
 from datetime import datetime
 from enum import Enum, auto
 from typing import Type, Optional, Any
 from pydantic import BaseModel
 from cognitive_architecture.infrastructure.databases.graph.get_graph_client import get_graph_client
 from cognitive_architecture.shared.data_models import GraphDBType, DefaultGraphModel, Document, DocumentType, Category, Relationship, UserProperties, UserLocation
-def create_semantic_graph(
+async def generate_node_id(instance: BaseModel) -> str:
-    text_input: str,
+    for field in ['id', 'doc_id', 'location_id', 'type_id']:
-    filename: str,
+        if hasattr(instance, field):
-    context,
+            return f"{instance.__class__.__name__}:{getattr(instance, field)}"
-    response_model: Type[BaseModel]
+    return f"{instance.__class__.__name__}:default"
-) -> KnowledgeGraph:
+async def add_node_and_edge(client, parent_id: Optional[str], node_id: str, node_data: dict, relationship_data: dict):
-    graph_type = GraphDBType.NEO4J
+    await client.add_node(node_id, **node_data)  # Add the current node with its data
    if parent_id:
        # Add an edge between the parent node and the current node with the correct relationship data
        await client.add_edge(parent_id, node_id, **relationship_data)
 async def process_attribute(G, parent_id: Optional[str], attribute: str, value: Any):
    if isinstance(value, BaseModel):
        node_id = await generate_node_id(value)
        node_data = value.dict(exclude={'default_relationship'})
        # Use the specified default relationship for the edge between the parent node and the current node
        relationship_data = value.default_relationship.dict() if hasattr(value, 'default_relationship') else {}
        await add_node_and_edge(G, parent_id, node_id, node_data, relationship_data)
        # Recursively process nested attributes to ensure all nodes and relationships are added to the graph
        for sub_attr, sub_val in value.__dict__.items():  # Access attributes and their values directly
            await process_attribute(G, node_id, sub_attr, sub_val)
    elif isinstance(value, list) and all(isinstance(item, BaseModel) for item in value):
        # For lists of BaseModel instances, process each item in the list
        for item in value:
            await process_attribute(G, parent_id, attribute, item)
 async def create_dynamic(graph_model, client) :
    await client.load_graph_from_file()
    root_id = await generate_node_id(graph_model)
    node_data = graph_model.dict(exclude={'default_relationship', 'id'})
    print(node_data)
    await client.add_node(root_id, **node_data)
    for attribute_name, attribute_value in graph_model:
        await process_attribute(client, root_id, attribute_name, attribute_value)
    return client
 async def create_semantic_graph(
 ):
    graph_type = GraphDBType.NETWORKX
    # Call the get_graph_client function with the selected graph type
    graph_client = get_graph_client(graph_type)
-GraphDBInterface
+    print(graph_client)
    await graph_client.load_graph_from_file()
    #
    #
    #
    # b = await graph_client.add_node("23ds",     {
    #     'username': 'exampleUser',
    #     'email': 'user@example.com'
    # })
    #
    # await graph_client.save_graph_to_file(b)
    graph_model_instance = DefaultGraphModel(
        id="user123",
        documents=[
            Document(
                doc_id="doc1",
                title="Document 1",
                summary="Summary of Document 1",
                content_id="content_id_for_doc1",  # Assuming external content storage ID
                doc_type=DocumentType(type_id="PDF", description="Portable Document Format"),
                categories=[
                    Category(category_id="finance", name="Finance",
                             default_relationship=Relationship(type="belongs_to")),
                    Category(category_id="tech", name="Technology",
                             default_relationship=Relationship(type="belongs_to"))
                ],
                default_relationship=Relationship(type='has_document')
            ),
            Document(
                doc_id="doc2",
                title="Document 2",
                summary="Summary of Document 2",
                content_id="content_id_for_doc2",
                doc_type=DocumentType(type_id="TXT", description="Text File"),
                categories=[
                    Category(category_id="health", name="Health", default_relationship=Relationship(type="belongs_to")),
                    Category(category_id="wellness", name="Wellness",
                             default_relationship=Relationship(type="belongs_to"))
                ],
                default_relationship=Relationship(type='has_document')
            )
        ],
        user_properties=UserProperties(
            custom_properties={"age": "30"},
            location=UserLocation(location_id="ny", description="New York",
                                  default_relationship=Relationship(type='located_in'))
        ),
        default_fields={
            'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
    )
    G = await create_dynamic(graph_model_instance, graph_client)
    # print("Nodes and their data:")
    # for node, data in G.graph.nodes(data=True):
    #     print(node, data)
    #
    # # Print edges with their data
    # print("\nEdges and their data:")
    # for source, target, data in G.graph.edges(data=True):
    #     print(f"{source} -> {target} {data}")
    # print(G)
    # return await graph_client.create( user_id = user_id, custom_user_properties=custom_user_properties, required_layers=required_layers, default_fields=default_fields, existing_graph=existing_graph)
 if __name__ == "__main__":
    import asyncio
    user_id = 'user123'
    custom_user_properties = {
        'username': 'exampleUser',
        'email': 'user@example.com'
    }
    asyncio.run(create_semantic_graph())
--- a/cognitive_architecture/modules/cognify/llm/classify_content.py
+++ b/cognitive_architecture/modules/cognify/llm/classify_content.py
@ -2,7 +2,7 @@
 from pydantic import BaseModel
 from typing import Type
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
-from cognitive_architecture.shared.data_models import ContentPrediction
+from cognitive_architecture.shared.data_models import DefaultContentPrediction
 from cognitive_architecture.utils import read_query_prompt
 async def classify_into_categories(text_input: str, system_prompt_path:str, response_model: Type[BaseModel]):
--- a/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_cog_layers.py
@ -2,7 +2,7 @@
 from typing import Type
 from pydantic import BaseModel
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
-from cognitive_architecture.shared.data_models import CognitiveLayer
+from cognitive_architecture.shared.data_models import DefaultCognitiveLayer
 from cognitive_architecture.utils import async_render_template
 async def content_to_cog_layers(filename: str,context, response_model: Type[BaseModel]):
--- a/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
+++ b/cognitive_architecture/modules/cognify/llm/content_to_propositions.py
@ -1,4 +1,5 @@
 """ This module is responsible for converting content to cognitive layers. """
 import json
 from typing import Type
 from pydantic import BaseModel
 from cognitive_architecture.infrastructure.llm.get_llm_client import get_llm_client
@ -10,7 +11,13 @@ async def generate_graph(text_input:str, filename: str,context, response_model:
    llm_client = get_llm_client()
    formatted_text_input = await async_render_template(filename, context)
-    return await llm_client.acreate_structured_output(text_input,formatted_text_input, response_model)
+    output = await llm_client.acreate_structured_output(text_input, formatted_text_input, response_model)
    context_key = json.dumps(context, sort_keys=True)
    # Returning a dictionary with context as the key and the awaited output as its value
    return {context_key: output}
 if __name__ == "__main__":
--- a/cognitive_architecture/shared/data_models.py
+++ b/cognitive_architecture/shared/data_models.py
@ -1,6 +1,6 @@
 """Data models for the cognitive architecture."""
-from enum import Enum
+from enum import Enum, auto
-from typing import Optional, List, Union
+from typing import Optional, List, Union, Dict, Any
 from pydantic import BaseModel, Field
@ -161,7 +161,7 @@ class ProceduralContent(ContentType):
    type:str = "PROCEDURAL"
    subclass: List[ProceduralSubclass]
-class ContentPrediction(BaseModel):
+class DefaultContentPrediction(BaseModel):
    """Class for a single class label prediction."""
    label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
@ -174,8 +174,53 @@ class CognitiveLayerSubgroup(BaseModel):
    description: str
-class CognitiveLayer(BaseModel):
+class DefaultCognitiveLayer(BaseModel):
    """Cognitive  layer"""
    category_name:str
    cognitive_layers: List[CognitiveLayerSubgroup] = Field(..., default_factory=list)
 class GraphDBType(Enum):
    NETWORKX = auto()
    NEO4J = auto()
 # Models for representing different entities
 class Relationship(BaseModel):
    type: str
    properties: Optional[Dict[str, Any]] = None
 class DocumentType(BaseModel):
    type_id: str
    description: str
    default_relationship: Relationship = Relationship(type='is_type')
 class Category(BaseModel):
    category_id: str
    name: str
    default_relationship: Relationship = Relationship(type='categorized_as')
 class Document(BaseModel):
    doc_id: str
    title: str
    summary: Optional[str] = None
    content_id: Optional[str] = None
    doc_type: Optional[DocumentType] = None
    categories: List[Category] = []
    default_relationship: Relationship = Relationship(type='has_document')
 class UserLocation(BaseModel):
    location_id: str
    description: str
    default_relationship: Relationship = Relationship(type='located_in')
 class UserProperties(BaseModel):
    custom_properties: Optional[Dict[str, Any]] = None
    location: Optional[UserLocation] = None
 class DefaultGraphModel(BaseModel):
    id: str
    user_properties: UserProperties = UserProperties()
    documents: List[Document] = []
    default_fields: Optional[Dict[str, Any]] = {}