diff --git a/README.md b/README.md index bf48ee1fe..0831faa90 100644 --- a/README.md +++ b/README.md @@ -149,110 +149,8 @@ Run -### Run the level 3 - -Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed. - -Copy the .env.example to .env and fill in the variables - - -Two ways to run the level 3: - -#### Docker: - -Copy the .env.template to .env and fill in the variables -Specify the environment variable in the .env file to "docker" -Launch the docker image: - -```docker compose up promethai_mem ``` - -Send the request to the API: - -``` -curl -X POST -H "Content-Type: application/json" -d '{ - "payload": { - "user_id": "97980cfea0067", - "data": [".data/3ZCCCW.pdf"], - "test_set": "sample", - "params": ["chunk_size"], - "metadata": "sample", - "retriever_type": "single_document_context" - } -}' http://0.0.0.0:8000/rag-test/rag_test_run - -``` -Params: - -- data -> list of URLs or path to the file, located in the .data folder (pdf, docx, txt, html) -- test_set -> sample, manual (list of questions and answers) -- metadata -> sample, manual (json) or version (in progress) -- params -> chunk_size, chunk_overlap, search_type (hybrid, bm25), embeddings -- retriever_type -> llm_context, single_document_context, multi_document_context, cognitive_architecture(coming soon) - -Inspect the results in the DB: - -``` docker exec -it postgres psql -U bla ``` - -``` \c bubu ``` - -``` select * from test_outputs; ``` - -Or set up the superset to visualize the results. -The base SQL query is in the example_data folder. - - - -#### Poetry environment: - - -Copy the .env.template to .env and fill in the variables -Specify the environment variable in the .env file to "local" - -Use the poetry environment: - -``` poetry shell ``` - -Change the .env file Environment variable to "local" - -Launch the postgres DB - -``` docker compose up postgres ``` - -Launch the superset - -``` docker compose up superset ``` - -Open the superset in your browser - -``` http://localhost:8088 ``` -Add the Postgres datasource to the Superset with the following connection string: - -``` postgres://bla:bla@postgres:5432/bubu ``` - -Make sure to run to initialize DB tables - -``` python scripts/create_database.py ``` - -After that, you can run the RAG test manager from your command line. - - -``` - python rag_test_manager.py \ - --file ".data" \ - --test_set "example_data/test_set.json" \ - --user_id "97980cfea0067" \ - --params "chunk_size" "search_type" \ - --metadata "example_data/metadata.json" \ - --retriever_type "single_document_context" - -``` - -Examples of metadata structure and test set are in the folder "example_data" - - - diff --git a/level_4/.env.template b/level_4/.env.template index f3e423907..bfa1122b3 100644 --- a/level_4/.env.template +++ b/level_4/.env.template @@ -7,4 +7,8 @@ POSTGRES_PASSWORD = bla POSTGRES_DB = bubu POSTGRES_HOST = localhost POSTGRES_HOST_DOCKER = postgres -SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B \ No newline at end of file +SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B +COG_ARCH_DIR = cognitive_architecture + GRAPH_DB_URL = + GRAPH_DB_PW = + GRAPH_DB_USER = diff --git a/level_4/cognitive_architecture/classifiers/__init__.py b/level_4/cognitive_architecture/classifiers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/classifiers/classifier.py b/level_4/cognitive_architecture/classifiers/classifier.py new file mode 100644 index 000000000..32a82132e --- /dev/null +++ b/level_4/cognitive_architecture/classifiers/classifier.py @@ -0,0 +1,15 @@ + + + +#TO DO, ADD ALL CLASSIFIERS HERE + + + +# classify retrievals according to type of retrieval +def classify_retrieval(): + pass + + +# classify documents according to type of document +def classify_call(): + pass diff --git a/level_4/cognitive_architecture/config.py b/level_4/cognitive_architecture/config.py new file mode 100644 index 000000000..bd965330d --- /dev/null +++ b/level_4/cognitive_architecture/config.py @@ -0,0 +1,84 @@ +import os +import json +import configparser +import uuid +from typing import Optional, List, Dict, Any +from dataclasses import dataclass, field +from pathlib import Path +from dotenv import load_dotenv + + +base_dir = Path(__file__).resolve().parent.parent +# Load the .env file from the base directory +dotenv_path = base_dir / '.env' +load_dotenv(dotenv_path=dotenv_path) + +@dataclass +class Config: + # Paths and Directories + memgpt_dir: str = field(default_factory=lambda: os.getenv('COG_ARCH_DIR', 'cognitive_achitecture')) + config_path: str = field(default_factory=lambda: os.path.join(os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'), 'config')) + + # Model parameters + model: str = 'gpt-4-1106-preview' + model_endpoint: str = 'openai' + openai_key: Optional[str] = os.getenv('OPENAI_API_KEY') + + # Embedding parameters + embedding_model: str = 'openai' + embedding_dim: int = 1536 + embedding_chunk_size: int = 300 + + # Database parameters + graph_database_url: str = os.getenv('GRAPH_DB_URL') + graph_database_username: str = os.getenv('GRAPH_DB_USER') + graph_database_password: str = os.getenv('GRAPH_DB_PW') + + + # Client ID + anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex) + + def load(self): + """Loads the configuration from a file or environment variables.""" + config = configparser.ConfigParser() + config.read(self.config_path) + + # Override with environment variables if they exist + for attr in self.__annotations__: + env_value = os.getenv(attr.upper()) + if env_value is not None: + setattr(self, attr, env_value) + + # Load from config file + if config.sections(): + for section in config.sections(): + for key, value in config.items(section): + if hasattr(self, key): + setattr(self, key, value) + + def save(self): + """Saves the current configuration to a file.""" + config = configparser.ConfigParser() + + # Save the current settings to the config file + for attr, value in self.__dict__.items(): + section, option = attr.split('_', 1) + if not config.has_section(section): + config.add_section(section) + config.set(section, option, str(value)) + + with open(self.config_path, 'w') as configfile: + config.write(configfile) + + def to_dict(self) -> Dict[str, Any]: + """Returns a dictionary representation of the configuration.""" + return {attr: getattr(self, attr) for attr in self.__annotations__} + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "Config": + """Creates a Config instance from a dictionary.""" + config = cls() + for attr, value in config_dict.items(): + if hasattr(config, attr): + setattr(config, attr, value) + return config diff --git a/level_4/cognitive_architecture/database/database_crud.py b/level_4/cognitive_architecture/database/database_crud.py index 65924bfae..5a9337f5f 100644 --- a/level_4/cognitive_architecture/database/database_crud.py +++ b/level_4/cognitive_architecture/database/database_crud.py @@ -23,4 +23,18 @@ async def session_scope(session): async def add_entity(session, entity): async with session_scope(session) as s: # Use your async session_scope s.add(entity) # No need to commit; session_scope takes care of it - return "Successfully added entity" \ No newline at end of file + return "Successfully added entity" + + + +async def update_entity(session, model, entity_id, new_value): + async with session_scope(session) as s: + # Retrieve the entity from the database + entity = await s.get(model, entity_id) + + if entity: + # Update the relevant column and 'updated_at' will be automatically updated + entity.operation_status = new_value + return "Successfully updated entity" + else: + return "Entity not found" \ No newline at end of file diff --git a/level_4/cognitive_architecture/graph_database/__init__.py b/level_4/cognitive_architecture/graph_database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/graph_database/graph.py b/level_4/cognitive_architecture/graph_database/graph.py new file mode 100644 index 000000000..15ed4f7df --- /dev/null +++ b/level_4/cognitive_architecture/graph_database/graph.py @@ -0,0 +1,588 @@ +from pydantic import BaseModel +from enum import Enum + +import typer +import os +import uuid +# import marvin +# from pydantic_settings import BaseSettings +from langchain.chains import GraphCypherQAChain +from langchain.chat_models import ChatOpenAI +# from marvin import ai_classifier +# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") + +import os + +print(os.getcwd()) + + +from ..models.sessions import (Session) +from ..models.testset import TestSet +from ..models.testoutput import TestOutput +from ..models.metadatas import MetaDatas +from ..models.operation import Operation +from ..models.docs import DocsModel +from ..models.memory import MemoryModel + +from pathlib import Path +import networkx as nx + +from langchain.document_loaders import TextLoader +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.graphs import Neo4jGraph +from langchain.text_splitter import TokenTextSplitter +from langchain.vectorstores import Neo4jVector +import os +from dotenv import load_dotenv +import uuid + +from graphviz import Digraph + +from ..database.database_crud import session_scope +from ..database.database import AsyncSessionLocal + +import openai +import instructor + + + +from abc import ABC, abstractmethod +from typing import List + +# Adds response_model to ChatCompletion +# Allows the return of Pydantic model rather than raw JSON +instructor.patch() +from pydantic import BaseModel, Field +from typing import List +from ..utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping +DEFAULT_PRESET = "promethai_chat" +preset_options = [DEFAULT_PRESET] +import questionary +PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") +load_dotenv() + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +from ..config import Config + +config = Config() +config.load() + +print(config.model) +print(config.openai_key) + + +import logging + +#Execute Cypher queries to create the user and memory components if they don't exist +# +# graph.query( +# f""" +# // Ensure the User node exists +# MERGE (user:User {{ userId: {user} }}) +# +# // Ensure the SemanticMemory node exists +# MERGE (semantic:SemanticMemory {{ userId: {user} }}) +# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) +# +# // Ensure the EpisodicMemory node exists +# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) +# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) +# +# // Ensure the Buffer node exists +# MERGE (buffer:Buffer {{ userId: {user} }}) +# MERGE (user)-[:HAS_BUFFER]->(buffer) +# """ +# ) +# +# # Execute Cypher queries to create the cognitive components in the graph +# graph.query( +# f""" +# // Parsing the query into components and linking them to the user and memory components +# MERGE (user:User {{ userId: {user} }}) +# MERGE (semantic:SemanticMemory {{ userId: {user} }}) +# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) +# MERGE (buffer:Buffer {{ userId: {user} }}) +# + # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }}) + # CREATE (action2:Event {{ description: 'get information', source: 'book' }}) + # CREATE (time:TimeContext {{ description: 'in the afternoon' }}) + # + # WITH user, semantic, episodic, buffer, action1, action2, time + # CREATE (knowledge:Knowledge {{ content: 'information from a book' }}) + # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge) + # CREATE (episodic)-[:HAS_EVENT]->(action1) + # CREATE (episodic)-[:HAS_EVENT]->(action2) + # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time) +# """ +# ) + + +class Node(BaseModel): + id: int + description: str + category: str + color: str ="blue" + memory_type: str + + + +class Edge(BaseModel): + source: int + target: int + description: str + color: str= "blue" + + +class KnowledgeGraph(BaseModel): + nodes: List[Node] = Field(..., default_factory=list) + edges: List[Edge] = Field(..., default_factory=list) + + +# + +def generate_graph(input) -> KnowledgeGraph: + return openai.ChatCompletion.create( + model="gpt-4-1106-preview", + messages=[ + { + "role": "user", + "content": f"""Use the given format to extract information from the following input: {input}. """, + + }, + { "role":"system", "content": """You are a top-tier algorithm + designed for extracting information in structured formats to build a knowledge graph. + - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. + - The aim is to achieve simplicity and clarity in the + knowledge graph, making it accessible for a vast audience. + ## 2. Labeling Nodes + - **Consistency**: Ensure you use basic or elementary types for node labels. + - For example, when you identify an entity representing a person, + always label it as **"person"**. + Avoid using more specific terms like "mathematician" or "scientist". + - Include event, entity, time, or action nodes to the category. + - Classify the memory type as episodic or semantic. + - **Node IDs**: Never utilize integers as node IDs. + Node IDs should be names or human-readable identifiers found in the text. + ## 3. Handling Numerical Data and Dates + - Numerical data, like age or other related information, + should be incorporated as attributes or properties of the respective nodes. + - **No Separate Nodes for Dates/Numbers**: + Do not create separate nodes for dates or numerical values. + Always attach them as attributes or properties of nodes. + - **Property Format**: Properties must be in a key-value format. + - **Quotation Marks**: Never use escaped single or double quotes within property values. + - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. + ## 4. Coreference Resolution + - **Maintain Entity Consistency**: + When extracting entities, it's vital to ensure consistency. + If an entity, such as "John Doe", is mentioned multiple times + in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), + always use the most complete identifier for that entity throughout the knowledge graph. + In this example, use "John Doe" as the entity ID. + Remember, the knowledge graph should be coherent and easily understandable, + so maintaining consistency in entity references is crucial. + ## 5. Strict Compliance + Adhere to the rules strictly. Non-compliance will result in termination."""} + ], + response_model=KnowledgeGraph, + ) + +class AbstractGraphDB(ABC): + + @abstractmethod + def query(self, query: str, params=None): + pass + + # @abstractmethod + # def create_nodes(self, nodes: List[dict]): + # pass + # + # @abstractmethod + # def create_edges(self, edges: List[dict]): + # pass + # + # @abstractmethod + # def create_memory_type_relationships(self, nodes: List[dict], memory_type: str): + # pass + + +class Neo4jGraphDB(AbstractGraphDB): + def __init__(self, url, username, password): + self.graph = Neo4jGraph(url=url, username=username, password=password) + self.openai_key = config.openai_key + + + + def query(self, query, params=None): + return self.graph.query(query, params) + # Initialize the Neo4j connection here + + + def create_base_cognitive_architecture(self, user_id: str): + # Create the user and memory components if they don't exist + user_memory_cypher = f""" + MERGE (user:User {{userId: '{user_id}'}}) + MERGE (semantic:SemanticMemory {{userId: '{user_id}'}}) + MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}}) + MERGE (buffer:Buffer {{userId: '{user_id}'}}) + MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) + MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) + MERGE (user)-[:HAS_BUFFER]->(buffer) + """ + + return user_memory_cypher + + def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph: + return openai.ChatCompletion.create( + model=config.model, + messages=[ + { + "role": "user", + "content": f"""Use the given format to extract information from the following input: {input}. """, + + }, + {"role": "system", "content": """You are a top-tier algorithm + designed for extracting information in structured formats to build a knowledge graph. + - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. + - The aim is to achieve simplicity and clarity in the + knowledge graph, making it accessible for a vast audience. + ## 2. Labeling Nodes + - **Consistency**: Ensure you use basic or elementary types for node labels. + - For example, when you identify an entity representing a person, + always label it as **"person"**. + Avoid using more specific terms like "mathematician" or "scientist". + - Include event, entity, time, or action nodes to the category. + - Classify the memory type as episodic or semantic. + - **Node IDs**: Never utilize integers as node IDs. + Node IDs should be names or human-readable identifiers found in the text. + ## 3. Handling Numerical Data and Dates + - Numerical data, like age or other related information, + should be incorporated as attributes or properties of the respective nodes. + - **No Separate Nodes for Dates/Numbers**: + Do not create separate nodes for dates or numerical values. + Always attach them as attributes or properties of nodes. + - **Property Format**: Properties must be in a key-value format. + - **Quotation Marks**: Never use escaped single or double quotes within property values. + - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. + ## 4. Coreference Resolution + - **Maintain Entity Consistency**: + When extracting entities, it's vital to ensure consistency. + If an entity, such as "John Doe", is mentioned multiple times + in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), + always use the most complete identifier for that entity throughout the knowledge graph. + In this example, use "John Doe" as the entity ID. + Remember, the knowledge graph should be coherent and easily understandable, + so maintaining consistency in entity references is crucial. + ## 5. Strict Compliance + Adhere to the rules strictly. Non-compliance will result in termination."""} + ], + response_model=KnowledgeGraph, + ) + + def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping): + create_statements = [] + for node in nodes: + original_variable_name = base_node_mapping[node['id']] + unique_variable_name = unique_mapping[original_variable_name] + node_label = node['category'].capitalize() + properties = {k: v for k, v in node.items() if k not in ['id', 'category']} + try: + properties = format_dict(properties) + except: + pass + create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})") + return create_statements + + # Update the function to generate Cypher CREATE statements for edges with unique variable names + def generate_create_statements_for_edges_with_uuid(self, edges, unique_mapping, base_node_mapping): + create_statements = [] + with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" + create_statements.append(with_statement) + + for edge in edges: + # print("HERE IS THE EDGE", edge) + source_variable = unique_mapping[base_node_mapping[edge['source']]] + target_variable = unique_mapping[base_node_mapping[edge['target']]] + relationship = edge['description'].replace(" ", "_").upper() + create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})") + return create_statements + + def generate_memory_type_relationships_with_uuid_and_time_context(self, nodes, unique_mapping, base_node_mapping): + create_statements = [] + with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" + create_statements.append(with_statement) + + # Loop through each node and create relationships based on memory_type + for node in nodes: + original_variable_name = base_node_mapping[node['id']] + unique_variable_name = unique_mapping[original_variable_name] + if node['memory_type'] == 'semantic': + create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})") + elif node['memory_type'] == 'episodic': + create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})") + if node['category'] == 'time': + create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})") + + # Assuming buffer holds all actions and times + # if node['category'] in ['action', 'time']: + create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})") + + return create_statements + + def generate_cypher_query_for_user_prompt_decomposition(self, user_id): + + graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") + graph_dic = graph.dict() + + node_variable_mapping = create_node_variable_mapping(graph_dic['nodes']) + edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges']) + # Create unique variable names for each node + unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping) + unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping) + create_nodes_statements = self.generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping) + create_edges_statements =self.generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping, node_variable_mapping) + + memory_type_statements_with_uuid_and_time_context = self.generate_memory_type_relationships_with_uuid_and_time_context( + graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping) + + # # Combine all statements + cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context + cypher_statements_joined = "\n".join(cypher_statements) + return cypher_statements_joined + + + def update_user_query_for_user_prompt_decomposition(self, user_id, user_query): + pass + + + def delete_all_user_memories(self, user_id): + try: + # Check if the user exists + user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user") + if not user_exists: + return f"No user found with ID: {user_id}" + + # Delete all memory nodes and relationships for the given user + delete_query = f""" + MATCH (user:User {{userId: '{user_id}'}})-[r]-() + DELETE r + WITH user + MATCH (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) + MATCH (user)-[:HAS_EPISODIC_MEMORY]->(episodic) + MATCH (user)-[:HAS_BUFFER]->(buffer) + DETACH DELETE semantic, episodic, buffer + """ + self.graph.query(delete_query) + return f"All memories deleted for user ID: {user_id}" + except Exception as e: + return f"An error occurred: {str(e)}" + + def delete_specific_memory_type(self, user_id, memory_type): + try: + # Check if the user exists + user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user") + if not user_exists: + return f"No user found with ID: {user_id}" + + # Validate memory type + if memory_type not in ['SemanticMemory', 'EpisodicMemory', 'Buffer']: + return "Invalid memory type. Choose from 'SemanticMemory', 'EpisodicMemory', or 'Buffer'." + + # Delete specific memory type nodes and relationships for the given user + delete_query = f""" + MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory) + DETACH DELETE memory + """ + self.graph.query(delete_query) + return f"{memory_type} deleted for user ID: {user_id}" + except Exception as e: + return f"An error occurred: {str(e)}" + def retrieve_semantic_memory(self, user_id: str): + query = """ + MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory) + MATCH (semantic)-[:HAS_KNOWLEDGE]->(knowledge) + RETURN knowledge + """ + return self.query(query, params={"user_id": user_id}) + + def retrieve_episodic_memory(self, user_id: str): + query = """ + MATCH (user:User {userId: $user_id})-[:HAS_EPISODIC_MEMORY]->(episodic:EpisodicMemory) + MATCH (episodic)-[:HAS_EVENT]->(event) + RETURN event + """ + return self.query(query, params={"user_id": user_id}) + + def retrieve_buffer_memory(self, user_id: str): + query = """ + MATCH (user:User {userId: $user_id})-[:HAS_BUFFER]->(buffer:Buffer) + MATCH (buffer)-[:CURRENTLY_HOLDING]->(item) + RETURN item + """ + return self.query(query, params={"user_id": user_id}) + def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str): + """ This function takes a document and generates a document summary in Semantic Memory""" + create_statements = [] + with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer" + create_statements.append(with_statement) + + # Loop through each node and create relationships based on memory_type + + create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})") + + + return create_statements + + + def generate_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str): + """ This function takes a document and generates a document summary in Semantic Memory""" + + + # fetch namespace from postgres db + # fetch 1st and last page from vector store + # summarize the text, add document type + # write to postgres + create_statements = [] + with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer" + create_statements.append(with_statement) + + # Loop through each node and create relationships based on memory_type + + create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})") + + + return create_statements + + + + + def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str: + """ + Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user. + + Parameters: + - document_summary (dict): A dictionary containing the document's category, title, and summary. + - user_id (str): The unique identifier for the user. + + Returns: + - str: A Cypher query string with parameters. + + Raises: + - ValueError: If any required data is missing or invalid. + """ + + # Validate the input parameters + if not isinstance(document_summary, dict): + raise ValueError("The document_summary must be a dictionary.") + if not all(key in document_summary for key in ['document_category', 'title', 'summary']): + raise ValueError("The document_summary dictionary is missing required keys.") + if not isinstance(user_id, str) or not user_id: + raise ValueError("The user_id must be a non-empty string.") + + # Escape single quotes in the document summary data (if not using parameters) + # title = document_summary['title'].replace("'", "\\'") + # summary = document_summary['summary'].replace("'", "\\'") + # document_category = document_summary['document_category'].replace("'", "\\'") + + # Generate the Cypher query using parameters + cypher_query = f''' + // Ensure the User node exists + MERGE (user:User {{ userId: $user_id }}) + + // Ensure the SemanticMemory node exists and is connected to the User + MERGE (semantic:SemanticMemory {{ userId: $user_id }}) + MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) + + // Create the Document node with its properties + CREATE (document:Document {{ + title: $title, + summary: $summary, + documentCategory: $document_category + }}) + + // Link the Document node to the SemanticMemory node + CREATE (semantic)-[:HAS_DOCUMENT]->(document) + ''' + + return cypher_query + + + + def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str): + # Generate the Cypher query + cypher_query = f''' + MATCH (user:User {{userId: $user_id}})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document) + SET document.vectordbNamespace = $vectordb_namespace + RETURN document + ''' + + # Parameters for the query + parameters = { + 'user_id': user_id, + 'vectordb_namespace': vectordb_namespace + } + + # Execute the query with the provided parameters + result = self.query(cypher_query, parameters) + + return result + + + + + + +class NetworkXGraphDB(AbstractGraphDB): + def __init__(self): + self.graph = nx.Graph() + # Initialize other necessary properties or configurations + + def create_base_cognitive_architecture(self, user_id: str): + # Add nodes for user and memory types if they don't exist + self.graph.add_node(user_id, type='User') + self.graph.add_node(f"{user_id}_semantic", type='SemanticMemory') + self.graph.add_node(f"{user_id}_episodic", type='EpisodicMemory') + self.graph.add_node(f"{user_id}_buffer", type='Buffer') + + # Add edges to connect user to memory types + self.graph.add_edge(user_id, f"{user_id}_semantic", relation='HAS_SEMANTIC_MEMORY') + self.graph.add_edge(user_id, f"{user_id}_episodic", relation='HAS_EPISODIC_MEMORY') + self.graph.add_edge(user_id, f"{user_id}_buffer", relation='HAS_BUFFER') + + def delete_all_user_memories(self, user_id: str): + # Remove nodes and edges related to the user's memories + for memory_type in ['semantic', 'episodic', 'buffer']: + memory_node = f"{user_id}_{memory_type}" + self.graph.remove_node(memory_node) + + def delete_specific_memory_type(self, user_id: str, memory_type: str): + # Remove a specific type of memory node and its related edges + memory_node = f"{user_id}_{memory_type.lower()}" + if memory_node in self.graph: + self.graph.remove_node(memory_node) + + # Methods for retrieving semantic, episodic, and buffer memories + def retrieve_semantic_memory(self, user_id: str): + return [n for n in self.graph.neighbors(f"{user_id}_semantic")] + + def retrieve_episodic_memory(self, user_id: str): + return [n for n in self.graph.neighbors(f"{user_id}_episodic")] + + def retrieve_buffer_memory(self, user_id: str): + return [n for n in self.graph.neighbors(f"{user_id}_buffer")] + +class GraphDBFactory: + def create_graph_db(self, db_type, **kwargs): + if db_type == 'neo4j': + return Neo4jGraphDB(**kwargs) + elif db_type == 'networkx': + return NetworkXGraphDB(**kwargs) + else: + raise ValueError(f"Unsupported database type: {db_type}") + + + + diff --git a/level_4/cognitive_architecture/models/docs.py b/level_4/cognitive_architecture/models/docs.py index 38166956b..9890d8600 100644 --- a/level_4/cognitive_architecture/models/docs.py +++ b/level_4/cognitive_architecture/models/docs.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class DocsModel(Base): __tablename__ = 'docs' diff --git a/level_4/cognitive_architecture/models/memory.py b/level_4/cognitive_architecture/models/memory.py index e19ecddc4..3be319f11 100644 --- a/level_4/cognitive_architecture/models/memory.py +++ b/level_4/cognitive_architecture/models/memory.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class MemoryModel(Base): __tablename__ = 'memories' diff --git a/level_4/cognitive_architecture/models/metadatas.py b/level_4/cognitive_architecture/models/metadatas.py index 7d2716cba..8ef53184a 100644 --- a/level_4/cognitive_architecture/models/metadatas.py +++ b/level_4/cognitive_architecture/models/metadatas.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class MetaDatas(Base): diff --git a/level_4/cognitive_architecture/models/operation.py b/level_4/cognitive_architecture/models/operation.py index 1d06657d9..a4350a95f 100644 --- a/level_4/cognitive_architecture/models/operation.py +++ b/level_4/cognitive_architecture/models/operation.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class Operation(Base): diff --git a/level_4/cognitive_architecture/models/sessions.py b/level_4/cognitive_architecture/models/sessions.py index 41110d52d..a805b50a9 100644 --- a/level_4/cognitive_architecture/models/sessions.py +++ b/level_4/cognitive_architecture/models/sessions.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class Session(Base): diff --git a/level_4/cognitive_architecture/models/testoutput.py b/level_4/cognitive_architecture/models/testoutput.py index 4731a6e46..5a886d098 100644 --- a/level_4/cognitive_architecture/models/testoutput.py +++ b/level_4/cognitive_architecture/models/testoutput.py @@ -14,7 +14,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class TestOutput(Base): diff --git a/level_4/cognitive_architecture/models/testset.py b/level_4/cognitive_architecture/models/testset.py index b9f17a192..e2df0bdb2 100644 --- a/level_4/cognitive_architecture/models/testset.py +++ b/level_4/cognitive_architecture/models/testset.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class TestSet(Base): diff --git a/level_4/cognitive_architecture/models/user.py b/level_4/cognitive_architecture/models/user.py index d9514c3cf..79c66b6dc 100644 --- a/level_4/cognitive_architecture/models/user.py +++ b/level_4/cognitive_architecture/models/user.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base import os import sys sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from database.database import Base +from ..database.database import Base class User(Base): diff --git a/level_4/cognitive_architecture/presets.py b/level_4/cognitive_architecture/presets.py new file mode 100644 index 000000000..d23276576 --- /dev/null +++ b/level_4/cognitive_architecture/presets.py @@ -0,0 +1,10 @@ + +DEFAULT_PRESET = "cognitive_architecture_chat" +preset_options = [DEFAULT_PRESET] + + + +def use_preset(): + """Placeholder for different present options""" + + pass diff --git a/level_4/cognitive_architecture/shared/__init__.py b/level_4/cognitive_architecture/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/level_4/cognitive_architecture/shared/chunk_strategy.py b/level_4/cognitive_architecture/shared/chunk_strategy.py new file mode 100644 index 000000000..c06f3278e --- /dev/null +++ b/level_4/cognitive_architecture/shared/chunk_strategy.py @@ -0,0 +1,7 @@ +from enum import Enum + +class ChunkStrategy(Enum): + EXACT = 'exact' + PARAGRAPH = 'paragraph' + SENTENCE = 'sentence' + VANILLA = 'vanilla' diff --git a/level_4/cognitive_architecture/utils.py b/level_4/cognitive_architecture/utils.py new file mode 100644 index 000000000..2eec84281 --- /dev/null +++ b/level_4/cognitive_architecture/utils.py @@ -0,0 +1,79 @@ +import uuid + +from graphviz import Digraph + +# from graph_database.graph import KnowledgeGraph + + +class Node: + def __init__(self, id, description, color): + self.id = id + self.description = description + self.color = color + +class Edge: + def __init__(self, source, target, label, color): + self.source = source + self.target = target + self.label = label + self.color = color +# def visualize_knowledge_graph(kg: KnowledgeGraph): +# dot = Digraph(comment="Knowledge Graph") +# +# # Add nodes +# for node in kg.nodes: +# dot.node(str(node.id), node.description, color=node.color) +# +# # Add edges +# for edge in kg.edges: +# dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color) +# +# # Render the graph +# dot.render("knowledge_graph.gv", view=True) +# +# + + +def format_dict(d): + # Initialize an empty list to store formatted items + formatted_items = [] + + # Iterate through all key-value pairs + for key, value in d.items(): + # Format key-value pairs with a colon and space, and adding quotes for string values + formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}" + formatted_items.append(formatted_item) + + # Join all formatted items with a comma and a space + formatted_string = ", ".join(formatted_items) + + # Add curly braces to mimic a dictionary + formatted_string = f"{{{formatted_string}}}" + + return formatted_string + + +def append_uuid_to_variable_names(variable_mapping): + unique_variable_mapping = {} + for original_name in variable_mapping.values(): + unique_name = f"{original_name}_{uuid.uuid4().hex}" + unique_variable_mapping[original_name] = unique_name + return unique_variable_mapping + + +# Update the functions to use the unique variable names +def create_node_variable_mapping(nodes): + mapping = {} + for node in nodes: + variable_name = f"{node['category']}{node['id']}".lower() + mapping[node['id']] = variable_name + return mapping + + +def create_edge_variable_mapping(edges): + mapping = {} + for edge in edges: + # Construct a unique identifier for the edge + variable_name = f"edge{edge['source']}to{edge['target']}".lower() + mapping[(edge['source'], edge['target'])] = variable_name + return mapping \ No newline at end of file diff --git a/level_4/cognitive_architecture/vectordb/basevectordb.py b/level_4/cognitive_architecture/vectordb/basevectordb.py index 81f4f7618..d28da5b50 100644 --- a/level_4/cognitive_architecture/vectordb/basevectordb.py +++ b/level_4/cognitive_architecture/vectordb/basevectordb.py @@ -4,7 +4,7 @@ from io import BytesIO import os, sys # Add the parent directory to sys.path sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB +from ..vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB import sqlalchemy as sa logging.basicConfig(level=logging.INFO) import marvin @@ -13,13 +13,13 @@ from dotenv import load_dotenv from langchain.document_loaders import PyPDFLoader from langchain.retrievers import WeaviateHybridSearchRetriever from weaviate.gql.get import HybridFusion -from models.sessions import Session -from models.testset import TestSet -from models.testoutput import TestOutput -from models.metadatas import MetaDatas -from models.operation import Operation +from ..models.sessions import Session +from ..models.testset import TestSet +from ..models.testoutput import TestOutput +from ..models.metadatas import MetaDatas +from ..models.operation import Operation from sqlalchemy.orm import sessionmaker -from database.database import engine +from ..database.database import engine load_dotenv() from typing import Optional import time diff --git a/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py b/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py index 225b72e87..a7bb8c943 100644 --- a/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py +++ b/level_4/cognitive_architecture/vectordb/chunkers/chunkers.py @@ -1,7 +1,7 @@ from langchain.document_loaders import PyPDFLoader import sys, os sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from shared.chunk_strategy import ChunkStrategy +from ..shared.chunk_strategy import ChunkStrategy import re def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None): diff --git a/level_4/main.py b/level_4/main.py index 6701dde32..facf9b5c7 100644 --- a/level_4/main.py +++ b/level_4/main.py @@ -9,23 +9,14 @@ from langchain.chains import GraphCypherQAChain from langchain.chat_models import ChatOpenAI # from marvin import ai_classifier # marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") -DEFAULT_PRESET = "promethai_chat" -preset_options = [DEFAULT_PRESET] -import questionary -PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") - - - -def create_config_dir(): - if not os.path.exists(PROMETHAI_DIR): - os.makedirs(PROMETHAI_DIR, exist_ok=True) - - folders = ["personas", "humans", "archival", "agents"] - for folder in folders: - if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)): - os.makedirs(os.path.join(PROMETHAI_DIR, folder)) - +from cognitive_architecture.models.sessions import Session +from cognitive_architecture.models.testset import TestSet +from cognitive_architecture.models.testoutput import TestOutput +from cognitive_architecture.models.metadatas import MetaDatas +from cognitive_architecture.models.operation import Operation +from cognitive_architecture.models.docs import DocsModel +from cognitive_architecture.models.memory import MemoryModel from pathlib import Path @@ -40,14 +31,8 @@ import uuid from graphviz import Digraph - -load_dotenv() - -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") - -txt_path = "dune.txt" - - +from cognitive_architecture.database.database_crud import session_scope +from cognitive_architecture.database.database import AsyncSessionLocal import openai import instructor @@ -57,333 +42,188 @@ import instructor instructor.patch() from pydantic import BaseModel, Field from typing import List +DEFAULT_PRESET = "promethai_chat" +preset_options = [DEFAULT_PRESET] +import questionary +PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") +load_dotenv() -class Node(BaseModel): - id: int - description: str - category: str - color: str ="blue" - memory_type: str +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") +from cognitive_architecture.config import Config + +config = Config() +config.load() + +print(config.model) +print(config.openai_key) - -class Edge(BaseModel): - source: int - target: int - description: str - color: str= "blue" - - -class KnowledgeGraph(BaseModel): - nodes: List[Node] = Field(..., default_factory=list) - edges: List[Edge] = Field(..., default_factory=list) - - -# - -def generate_graph(input) -> KnowledgeGraph: - return openai.ChatCompletion.create( - model="gpt-4-1106-preview", - messages=[ - { - "role": "user", - "content": f"""Use the given format to extract information from the following input: {input}. """, - - }, - { "role":"system", "content": """You are a top-tier algorithm - designed for extracting information in structured formats to build a knowledge graph. - - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. - - The aim is to achieve simplicity and clarity in the - knowledge graph, making it accessible for a vast audience. - ## 2. Labeling Nodes - - **Consistency**: Ensure you use basic or elementary types for node labels. - - For example, when you identify an entity representing a person, - always label it as **"person"**. - Avoid using more specific terms like "mathematician" or "scientist". - - Include event, entity, time, or action nodes to the category. - - Classify the memory type as episodic or semantic. - - **Node IDs**: Never utilize integers as node IDs. - Node IDs should be names or human-readable identifiers found in the text. - ## 3. Handling Numerical Data and Dates - - Numerical data, like age or other related information, - should be incorporated as attributes or properties of the respective nodes. - - **No Separate Nodes for Dates/Numbers**: - Do not create separate nodes for dates or numerical values. - Always attach them as attributes or properties of nodes. - - **Property Format**: Properties must be in a key-value format. - - **Quotation Marks**: Never use escaped single or double quotes within property values. - - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. - ## 4. Coreference Resolution - - **Maintain Entity Consistency**: - When extracting entities, it's vital to ensure consistency. - If an entity, such as "John Doe", is mentioned multiple times - in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), - always use the most complete identifier for that entity throughout the knowledge graph. - In this example, use "John Doe" as the entity ID. - Remember, the knowledge graph should be coherent and easily understandable, - so maintaining consistency in entity references is crucial. - ## 5. Strict Compliance - Adhere to the rules strictly. Non-compliance will result in termination."""} - ], - response_model=KnowledgeGraph, - ) +import logging -def execute_cypher_query(query: str): - graph_ = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein") - graph_.query(query) - # This is a placeholder for the logic that will execute the Cypher query - # You would replace this with the actual logic to run the query in your Neo4j database - print(query) - -#Execute Cypher queries to create the user and memory components if they don't exist -# -# graph.query( -# f""" -# // Ensure the User node exists -# MERGE (user:User {{ userId: {user} }}) -# -# // Ensure the SemanticMemory node exists -# MERGE (semantic:SemanticMemory {{ userId: {user} }}) -# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) -# -# // Ensure the EpisodicMemory node exists -# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) -# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) -# -# // Ensure the Buffer node exists -# MERGE (buffer:Buffer {{ userId: {user} }}) -# MERGE (user)-[:HAS_BUFFER]->(buffer) -# """ -# ) -# -# # Execute Cypher queries to create the cognitive components in the graph -# graph.query( -# f""" -# // Parsing the query into components and linking them to the user and memory components -# MERGE (user:User {{ userId: {user} }}) -# MERGE (semantic:SemanticMemory {{ userId: {user} }}) -# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) -# MERGE (buffer:Buffer {{ userId: {user} }}) -# - # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }}) - # CREATE (action2:Event {{ description: 'get information', source: 'book' }}) - # CREATE (time:TimeContext {{ description: 'in the afternoon' }}) - # - # WITH user, semantic, episodic, buffer, action1, action2, time - # CREATE (knowledge:Knowledge {{ content: 'information from a book' }}) - # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge) - # CREATE (episodic)-[:HAS_EVENT]->(action1) - # CREATE (episodic)-[:HAS_EVENT]->(action2) - # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time) - # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1) - # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2) - # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time) -# """ -# ) +import asyncio +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select -class Node: - def __init__(self, id, description, color): - self.id = id - self.description = description - self.color = color +async def get_vectordb_namespace(session: AsyncSession, user_id: str): + try: + result = await session.execute( + select(MemoryModel.id).where(MemoryModel.user_id == user_id).order_by(MemoryModel.created_at.desc()).limit(1) + ) + namespace = result.scalar_one_or_none() + return namespace + except Exception as e: + logging.error(f"An error occurred while retrieving the Vectordb_namespace: {str(e)}") + return None -class Edge: - def __init__(self, source, target, label, color): - self.source = source - self.target = target - self.label = label - self.color = color -def visualize_knowledge_graph(kg: KnowledgeGraph): - dot = Digraph(comment="Knowledge Graph") - - # Add nodes - for node in kg.nodes: - dot.node(str(node.id), node.description, color=node.color) - - # Add edges - for edge in kg.edges: - dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color) - - # Render the graph - dot.render("knowledge_graph.gv", view=True) +# async def retrieve_job_by_id(session, user_id, job_id): +# try: +# result = await session.execute( +# session.query(Session.id) +# .filter_by(user_id=user_id, id=job_id) +# .order_by(Session.created_at) +# ) +# return result.scalar_one_or_none() +# except Exception as e: +# logging.error(f"An error occurred while retrieving the job: {str(e)}") +# return None -def create_base_queries_from_user( user_id: str): - # Create the user and memory components if they don't exist - user_memory_cypher = f""" - MERGE (user:User {{userId: '{user_id}'}}) - MERGE (semantic:SemanticMemory {{userId: '{user_id}'}}) - MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}}) - MERGE (buffer:Buffer {{userId: '{user_id}'}}) - MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) - MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) - MERGE (user)-[:HAS_BUFFER]->(buffer) + + +async def update_document_vectordb_namespace(postgres_session: AsyncSession, user_id: str, namespace: str = None): """ + Update the Document node with the Vectordb_namespace for the given user. If the namespace is not provided, + it will be retrieved from the PostgreSQL database. - return user_memory_cypher + Args: + postgres_session (AsyncSession): The async session for connecting to the PostgreSQL database. + user_id (str): The user's unique identifier. + namespace (str, optional): The Vectordb_namespace. If None, it will be retrieved from the database. -# Function to append a UUID4 to the variable names to ensure uniqueness -def append_uuid_to_variable_names(variable_mapping): - unique_variable_mapping = {} - for original_name in variable_mapping.values(): - unique_name = f"{original_name}_{uuid.uuid4().hex}" - unique_variable_mapping[original_name] = unique_name - return unique_variable_mapping + Returns: + The result of the update operation or None if an error occurred. + """ + vectordb_namespace = namespace -# Update the functions to use the unique variable names -def create_node_variable_mapping(nodes): - mapping = {} - for node in nodes: - variable_name = f"{node['category']}{node['id']}".lower() - mapping[node['id']] = variable_name - return mapping + # Retrieve namespace from the database if not provided + if vectordb_namespace is None: + vectordb_namespace = await get_vectordb_namespace(postgres_session, user_id) + if not vectordb_namespace: + logging.error("Vectordb_namespace could not be retrieved.") + return None - -def create_edge_variable_mapping(edges): - mapping = {} - for edge in edges: - # Construct a unique identifier for the edge - variable_name = f"edge{edge['source']}to{edge['target']}".lower() - mapping[(edge['source'], edge['target'])] = variable_name - return mapping - - -# Update the function to generate Cypher CREATE statements for nodes with unique variable names - - -def format_dict(d): - # Initialize an empty list to store formatted items - formatted_items = [] - - # Iterate through all key-value pairs - for key, value in d.items(): - # Format key-value pairs with a colon and space, and adding quotes for string values - formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}" - formatted_items.append(formatted_item) - - # Join all formatted items with a comma and a space - formatted_string = ", ".join(formatted_items) - - # Add curly braces to mimic a dictionary - formatted_string = f"{{{formatted_string}}}" - - return formatted_string -def generate_create_statements_for_nodes_with_uuid(nodes, unique_mapping): - create_statements = [] - for node in nodes: - original_variable_name = node_variable_mapping[node['id']] - unique_variable_name = unique_mapping[original_variable_name] - node_label = node['category'].capitalize() - properties = {k: v for k, v in node.items() if k not in ['id', 'category']} - try: - properties = format_dict(properties) - except: - pass - create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})") - return create_statements - -# Update the function to generate Cypher CREATE statements for edges with unique variable names -def generate_create_statements_for_edges_with_uuid(edges, unique_mapping): - create_statements = [] - with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" - create_statements.append(with_statement) - - for edge in edges: - # print("HERE IS THE EDGE", edge) - source_variable = unique_mapping[node_variable_mapping[edge['source']]] - target_variable = unique_mapping[node_variable_mapping[edge['target']]] - relationship = edge['description'].replace(" ", "_").upper() - create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})") - return create_statements - - -# Update the function to generate Cypher CREATE statements for memory type relationships with unique variable names -def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_mapping): - create_statements = [] - with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" - create_statements.append(with_statement) - - # Loop through each node and create relationships based on memory_type - for node in nodes: - original_variable_name = node_variable_mapping[node['id']] - unique_variable_name = unique_mapping[original_variable_name] - if node['memory_type'] == 'semantic': - create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})") - elif node['memory_type'] == 'episodic': - create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})") - if node['category'] == 'time': - create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})") - - # Assuming buffer holds all actions and times - # if node['category'] in ['action', 'time']: - create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})") - - return create_statements + # Update the Document node in Neo4j with the namespace + update_result = update_document_node_with_namespace(user_id, vectordb_namespace) + return update_result -# Main execution logic -if __name__ == "__main__": - user_id = "User1" - query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" - # Generate the knowledge graph from the user input - knowledge_graph = generate_graph(query_input) - visualize_knowledge_graph(knowledge_graph) - # out = knowledge_graph.dict() - # print(out) + # query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" # - graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") - graph_dic = graph.dict() - - node_variable_mapping = create_node_variable_mapping(graph_dic['nodes']) - edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges']) - # Create unique variable names for each node - unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping) - unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping) - create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping) - create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping) - - memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context( - graph_dic['nodes'], unique_node_variable_mapping) - - # # Combine all statements - cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context - cypher_statements_joined = "\n".join(cypher_statements) - - - - execute_cypher_query(cypher_statements_joined) - - - - # Translate the KnowledgeGraph into Cypher queries - - - - # Make document summary in Semantic Memory - # Document summary links to a Namespace in Vector Store - # Categorize document types in Semantic Memory - # Make a spine classifier that retrieves the relevant document namespaces from Vector Store + # # Generate the knowledge graph from the user input + # knowledge_graph = generate_graph(query_input) + # visualize_knowledge_graph(knowledge_graph) + # # out = knowledge_graph.dict() + # # print(out) + # # + # graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") + # graph_dic = graph.dict() # - # Connect document summary to chunks in Weaviate vector store + # node_variable_mapping = create_node_variable_mapping(graph_dic['nodes']) + # edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges']) + # # Create unique variable names for each node + # unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping) + # unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping) + # create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping) + # create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping) + # + # memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context( + # graph_dic['nodes'], unique_node_variable_mapping) + # + # # # Combine all statements + # cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context + # cypher_statements_joined = "\n".join(cypher_statements) + # + # + # + # execute_cypher_query(cypher_statements_joined) - - - # print(cypher_query) -# # -# # # Execute the Cypher queries to create the graph in Neo4j -# execute_cypher_query(cypher_query) -# # Refresh the graph schema -# graph.refresh_schema() + # bartleby_summary = { + # "document_category": "Classic Literature", + # "title": "Bartleby, the Scrivener", + # "summary": ( + # "Bartleby, the Scrivener: A Story of Wall Street' is a short story by Herman Melville " + # "that tells the tale of Bartleby, a scrivener, or copyist, who works for a Manhattan " + # "lawyer. Initially, Bartleby is a competent and industrious worker. However, one day, " + # "when asked to proofread a document, he responds with what becomes his constant refrain " + # "to any request: 'I would prefer not to.' As the story progresses, Bartleby becomes " + # "increasingly passive, refusing not just work but also food and eventually life itself, " + # "as he spirals into a state of passive resistance. The lawyer, the narrator of the story, " + # "is both fascinated and frustrated by Bartleby's behavior. Despite attempts to understand " + # "and help him, Bartleby remains an enigmatic figure, his motives and thoughts unexplained. " + # "He is eventually evicted from the office and later found dead in a prison yard, having " + # "preferred not to live. The story is a meditation on the themes of isolation, societal " + # "obligation, and the inexplicable nature of human behavior." + # ) + # } + # rs = create_document_node_cypher(bartleby_summary, user_id) + # + # parameters = { + # 'user_id': user_id, + # 'title': bartleby_summary['title'], + # 'summary': bartleby_summary['summary'], + # 'document_category': bartleby_summary['document_category'] + # } + # + # execute_cypher_query(rs, parameters) # -# # Print the schema to the console -# print(graph.schema) +# async def main(): +# user_id = "User1" +# +# async with session_scope(AsyncSessionLocal()) as session: +# await update_document_vectordb_namespace(session, user_id) +# +# # print(rs) +# +# if __name__ == "__main__": +# import asyncio +# +# asyncio.run(main()) +# +# # config = Config() +# # config.load() +# # +# # print(config.model) +# # print(config.openai_key) + + + +async def main(): + user_id = "User1" + from cognitive_architecture.graph_database.graph import Neo4jGraphDB + # Example initialization (replace with your actual connection details) + neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein') + # Generate the Cypher query for a specific user + user_id = 'user123' # Replace with the actual user ID + cypher_query = neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id) + # Execute the generated Cypher query + result = neo4j_graph_db.query(cypher_query) + + # async with session_scope(AsyncSessionLocal()) as session: + # await update_document_vectordb_namespace(session, user_id) + + # print(rs) + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) \ No newline at end of file