Added dynamic graph creation

This commit is contained in:
Vasilije 2023-11-10 18:24:31 +01:00
parent 0b6f9b0dca
commit ceba0d31e7
23 changed files with 984 additions and 445 deletions

102
README.md
View file

@ -149,110 +149,8 @@ Run
### Run the level 3
Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed.
Copy the .env.example to .env and fill in the variables
Two ways to run the level 3:
#### Docker:
Copy the .env.template to .env and fill in the variables
Specify the environment variable in the .env file to "docker"
Launch the docker image:
```docker compose up promethai_mem ```
Send the request to the API:
```
curl -X POST -H "Content-Type: application/json" -d '{
"payload": {
"user_id": "97980cfea0067",
"data": [".data/3ZCCCW.pdf"],
"test_set": "sample",
"params": ["chunk_size"],
"metadata": "sample",
"retriever_type": "single_document_context"
}
}' http://0.0.0.0:8000/rag-test/rag_test_run
```
Params:
- data -> list of URLs or path to the file, located in the .data folder (pdf, docx, txt, html)
- test_set -> sample, manual (list of questions and answers)
- metadata -> sample, manual (json) or version (in progress)
- params -> chunk_size, chunk_overlap, search_type (hybrid, bm25), embeddings
- retriever_type -> llm_context, single_document_context, multi_document_context, cognitive_architecture(coming soon)
Inspect the results in the DB:
``` docker exec -it postgres psql -U bla ```
``` \c bubu ```
``` select * from test_outputs; ```
Or set up the superset to visualize the results.
The base SQL query is in the example_data folder.
#### Poetry environment:
Copy the .env.template to .env and fill in the variables
Specify the environment variable in the .env file to "local"
Use the poetry environment:
``` poetry shell ```
Change the .env file Environment variable to "local"
Launch the postgres DB
``` docker compose up postgres ```
Launch the superset
``` docker compose up superset ```
Open the superset in your browser
``` http://localhost:8088 ```
Add the Postgres datasource to the Superset with the following connection string:
``` postgres://bla:bla@postgres:5432/bubu ```
Make sure to run to initialize DB tables
``` python scripts/create_database.py ```
After that, you can run the RAG test manager from your command line.
```
python rag_test_manager.py \
--file ".data" \
--test_set "example_data/test_set.json" \
--user_id "97980cfea0067" \
--params "chunk_size" "search_type" \
--metadata "example_data/metadata.json" \
--retriever_type "single_document_context"
```
Examples of metadata structure and test set are in the folder "example_data"

View file

@ -7,4 +7,8 @@ POSTGRES_PASSWORD = bla
POSTGRES_DB = bubu
POSTGRES_HOST = localhost
POSTGRES_HOST_DOCKER = postgres
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
COG_ARCH_DIR = cognitive_architecture
GRAPH_DB_URL =
GRAPH_DB_PW =
GRAPH_DB_USER =

View file

@ -0,0 +1,15 @@
#TO DO, ADD ALL CLASSIFIERS HERE
# classify retrievals according to type of retrieval
def classify_retrieval():
pass
# classify documents according to type of document
def classify_call():
pass

View file

@ -0,0 +1,84 @@
import os
import json
import configparser
import uuid
from typing import Optional, List, Dict, Any
from dataclasses import dataclass, field
from pathlib import Path
from dotenv import load_dotenv
base_dir = Path(__file__).resolve().parent.parent
# Load the .env file from the base directory
dotenv_path = base_dir / '.env'
load_dotenv(dotenv_path=dotenv_path)
@dataclass
class Config:
# Paths and Directories
memgpt_dir: str = field(default_factory=lambda: os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'))
config_path: str = field(default_factory=lambda: os.path.join(os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'), 'config'))
# Model parameters
model: str = 'gpt-4-1106-preview'
model_endpoint: str = 'openai'
openai_key: Optional[str] = os.getenv('OPENAI_API_KEY')
# Embedding parameters
embedding_model: str = 'openai'
embedding_dim: int = 1536
embedding_chunk_size: int = 300
# Database parameters
graph_database_url: str = os.getenv('GRAPH_DB_URL')
graph_database_username: str = os.getenv('GRAPH_DB_USER')
graph_database_password: str = os.getenv('GRAPH_DB_PW')
# Client ID
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
def load(self):
"""Loads the configuration from a file or environment variables."""
config = configparser.ConfigParser()
config.read(self.config_path)
# Override with environment variables if they exist
for attr in self.__annotations__:
env_value = os.getenv(attr.upper())
if env_value is not None:
setattr(self, attr, env_value)
# Load from config file
if config.sections():
for section in config.sections():
for key, value in config.items(section):
if hasattr(self, key):
setattr(self, key, value)
def save(self):
"""Saves the current configuration to a file."""
config = configparser.ConfigParser()
# Save the current settings to the config file
for attr, value in self.__dict__.items():
section, option = attr.split('_', 1)
if not config.has_section(section):
config.add_section(section)
config.set(section, option, str(value))
with open(self.config_path, 'w') as configfile:
config.write(configfile)
def to_dict(self) -> Dict[str, Any]:
"""Returns a dictionary representation of the configuration."""
return {attr: getattr(self, attr) for attr in self.__annotations__}
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
"""Creates a Config instance from a dictionary."""
config = cls()
for attr, value in config_dict.items():
if hasattr(config, attr):
setattr(config, attr, value)
return config

View file

@ -23,4 +23,18 @@ async def session_scope(session):
async def add_entity(session, entity):
async with session_scope(session) as s: # Use your async session_scope
s.add(entity) # No need to commit; session_scope takes care of it
return "Successfully added entity"
return "Successfully added entity"
async def update_entity(session, model, entity_id, new_value):
async with session_scope(session) as s:
# Retrieve the entity from the database
entity = await s.get(model, entity_id)
if entity:
# Update the relevant column and 'updated_at' will be automatically updated
entity.operation_status = new_value
return "Successfully updated entity"
else:
return "Entity not found"

View file

@ -0,0 +1,588 @@
from pydantic import BaseModel
from enum import Enum
import typer
import os
import uuid
# import marvin
# from pydantic_settings import BaseSettings
from langchain.chains import GraphCypherQAChain
from langchain.chat_models import ChatOpenAI
# from marvin import ai_classifier
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
import os
print(os.getcwd())
from ..models.sessions import (Session)
from ..models.testset import TestSet
from ..models.testoutput import TestOutput
from ..models.metadatas import MetaDatas
from ..models.operation import Operation
from ..models.docs import DocsModel
from ..models.memory import MemoryModel
from pathlib import Path
import networkx as nx
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.graphs import Neo4jGraph
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Neo4jVector
import os
from dotenv import load_dotenv
import uuid
from graphviz import Digraph
from ..database.database_crud import session_scope
from ..database.database import AsyncSessionLocal
import openai
import instructor
from abc import ABC, abstractmethod
from typing import List
# Adds response_model to ChatCompletion
# Allows the return of Pydantic model rather than raw JSON
instructor.patch()
from pydantic import BaseModel, Field
from typing import List
from ..utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping
DEFAULT_PRESET = "promethai_chat"
preset_options = [DEFAULT_PRESET]
import questionary
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
from ..config import Config
config = Config()
config.load()
print(config.model)
print(config.openai_key)
import logging
#Execute Cypher queries to create the user and memory components if they don't exist
#
# graph.query(
# f"""
# // Ensure the User node exists
# MERGE (user:User {{ userId: {user} }})
#
# // Ensure the SemanticMemory node exists
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
#
# // Ensure the EpisodicMemory node exists
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
#
# // Ensure the Buffer node exists
# MERGE (buffer:Buffer {{ userId: {user} }})
# MERGE (user)-[:HAS_BUFFER]->(buffer)
# """
# )
#
# # Execute Cypher queries to create the cognitive components in the graph
# graph.query(
# f"""
# // Parsing the query into components and linking them to the user and memory components
# MERGE (user:User {{ userId: {user} }})
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (buffer:Buffer {{ userId: {user} }})
#
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
#
# WITH user, semantic, episodic, buffer, action1, action2, time
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
# CREATE (episodic)-[:HAS_EVENT]->(action1)
# CREATE (episodic)-[:HAS_EVENT]->(action2)
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
# """
# )
class Node(BaseModel):
id: int
description: str
category: str
color: str ="blue"
memory_type: str
class Edge(BaseModel):
source: int
target: int
description: str
color: str= "blue"
class KnowledgeGraph(BaseModel):
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)
#
def generate_graph(input) -> KnowledgeGraph:
return openai.ChatCompletion.create(
model="gpt-4-1106-preview",
messages=[
{
"role": "user",
"content": f"""Use the given format to extract information from the following input: {input}. """,
},
{ "role":"system", "content": """You are a top-tier algorithm
designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the
knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person,
always label it as **"person"**.
Avoid using more specific terms like "mathematician" or "scientist".
- Include event, entity, time, or action nodes to the category.
- Classify the memory type as episodic or semantic.
- **Node IDs**: Never utilize integers as node IDs.
Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information,
should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**:
Do not create separate nodes for dates or numerical values.
Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**:
When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph.
In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable,
so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination."""}
],
response_model=KnowledgeGraph,
)
class AbstractGraphDB(ABC):
@abstractmethod
def query(self, query: str, params=None):
pass
# @abstractmethod
# def create_nodes(self, nodes: List[dict]):
# pass
#
# @abstractmethod
# def create_edges(self, edges: List[dict]):
# pass
#
# @abstractmethod
# def create_memory_type_relationships(self, nodes: List[dict], memory_type: str):
# pass
class Neo4jGraphDB(AbstractGraphDB):
def __init__(self, url, username, password):
self.graph = Neo4jGraph(url=url, username=username, password=password)
self.openai_key = config.openai_key
def query(self, query, params=None):
return self.graph.query(query, params)
# Initialize the Neo4j connection here
def create_base_cognitive_architecture(self, user_id: str):
# Create the user and memory components if they don't exist
user_memory_cypher = f"""
MERGE (user:User {{userId: '{user_id}'}})
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
MERGE (buffer:Buffer {{userId: '{user_id}'}})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
MERGE (user)-[:HAS_BUFFER]->(buffer)
"""
return user_memory_cypher
def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph:
return openai.ChatCompletion.create(
model=config.model,
messages=[
{
"role": "user",
"content": f"""Use the given format to extract information from the following input: {input}. """,
},
{"role": "system", "content": """You are a top-tier algorithm
designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the
knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person,
always label it as **"person"**.
Avoid using more specific terms like "mathematician" or "scientist".
- Include event, entity, time, or action nodes to the category.
- Classify the memory type as episodic or semantic.
- **Node IDs**: Never utilize integers as node IDs.
Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information,
should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**:
Do not create separate nodes for dates or numerical values.
Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**:
When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph.
In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable,
so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination."""}
],
response_model=KnowledgeGraph,
)
def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping):
create_statements = []
for node in nodes:
original_variable_name = base_node_mapping[node['id']]
unique_variable_name = unique_mapping[original_variable_name]
node_label = node['category'].capitalize()
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
try:
properties = format_dict(properties)
except:
pass
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
return create_statements
# Update the function to generate Cypher CREATE statements for edges with unique variable names
def generate_create_statements_for_edges_with_uuid(self, edges, unique_mapping, base_node_mapping):
create_statements = []
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
for edge in edges:
# print("HERE IS THE EDGE", edge)
source_variable = unique_mapping[base_node_mapping[edge['source']]]
target_variable = unique_mapping[base_node_mapping[edge['target']]]
relationship = edge['description'].replace(" ", "_").upper()
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
return create_statements
def generate_memory_type_relationships_with_uuid_and_time_context(self, nodes, unique_mapping, base_node_mapping):
create_statements = []
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
# Loop through each node and create relationships based on memory_type
for node in nodes:
original_variable_name = base_node_mapping[node['id']]
unique_variable_name = unique_mapping[original_variable_name]
if node['memory_type'] == 'semantic':
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
elif node['memory_type'] == 'episodic':
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
if node['category'] == 'time':
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
# Assuming buffer holds all actions and times
# if node['category'] in ['action', 'time']:
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
return create_statements
def generate_cypher_query_for_user_prompt_decomposition(self, user_id):
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
graph_dic = graph.dict()
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
# Create unique variable names for each node
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
create_nodes_statements = self.generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
create_edges_statements =self.generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping, node_variable_mapping)
memory_type_statements_with_uuid_and_time_context = self.generate_memory_type_relationships_with_uuid_and_time_context(
graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
# # Combine all statements
cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
cypher_statements_joined = "\n".join(cypher_statements)
return cypher_statements_joined
def update_user_query_for_user_prompt_decomposition(self, user_id, user_query):
pass
def delete_all_user_memories(self, user_id):
try:
# Check if the user exists
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
if not user_exists:
return f"No user found with ID: {user_id}"
# Delete all memory nodes and relationships for the given user
delete_query = f"""
MATCH (user:User {{userId: '{user_id}'}})-[r]-()
DELETE r
WITH user
MATCH (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
MATCH (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
MATCH (user)-[:HAS_BUFFER]->(buffer)
DETACH DELETE semantic, episodic, buffer
"""
self.graph.query(delete_query)
return f"All memories deleted for user ID: {user_id}"
except Exception as e:
return f"An error occurred: {str(e)}"
def delete_specific_memory_type(self, user_id, memory_type):
try:
# Check if the user exists
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
if not user_exists:
return f"No user found with ID: {user_id}"
# Validate memory type
if memory_type not in ['SemanticMemory', 'EpisodicMemory', 'Buffer']:
return "Invalid memory type. Choose from 'SemanticMemory', 'EpisodicMemory', or 'Buffer'."
# Delete specific memory type nodes and relationships for the given user
delete_query = f"""
MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory)
DETACH DELETE memory
"""
self.graph.query(delete_query)
return f"{memory_type} deleted for user ID: {user_id}"
except Exception as e:
return f"An error occurred: {str(e)}"
def retrieve_semantic_memory(self, user_id: str):
query = """
MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)
MATCH (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
RETURN knowledge
"""
return self.query(query, params={"user_id": user_id})
def retrieve_episodic_memory(self, user_id: str):
query = """
MATCH (user:User {userId: $user_id})-[:HAS_EPISODIC_MEMORY]->(episodic:EpisodicMemory)
MATCH (episodic)-[:HAS_EVENT]->(event)
RETURN event
"""
return self.query(query, params={"user_id": user_id})
def retrieve_buffer_memory(self, user_id: str):
query = """
MATCH (user:User {userId: $user_id})-[:HAS_BUFFER]->(buffer:Buffer)
MATCH (buffer)-[:CURRENTLY_HOLDING]->(item)
RETURN item
"""
return self.query(query, params={"user_id": user_id})
def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
""" This function takes a document and generates a document summary in Semantic Memory"""
create_statements = []
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
# Loop through each node and create relationships based on memory_type
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
return create_statements
def generate_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
""" This function takes a document and generates a document summary in Semantic Memory"""
# fetch namespace from postgres db
# fetch 1st and last page from vector store
# summarize the text, add document type
# write to postgres
create_statements = []
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
# Loop through each node and create relationships based on memory_type
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
return create_statements
def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str:
"""
Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user.
Parameters:
- document_summary (dict): A dictionary containing the document's category, title, and summary.
- user_id (str): The unique identifier for the user.
Returns:
- str: A Cypher query string with parameters.
Raises:
- ValueError: If any required data is missing or invalid.
"""
# Validate the input parameters
if not isinstance(document_summary, dict):
raise ValueError("The document_summary must be a dictionary.")
if not all(key in document_summary for key in ['document_category', 'title', 'summary']):
raise ValueError("The document_summary dictionary is missing required keys.")
if not isinstance(user_id, str) or not user_id:
raise ValueError("The user_id must be a non-empty string.")
# Escape single quotes in the document summary data (if not using parameters)
# title = document_summary['title'].replace("'", "\\'")
# summary = document_summary['summary'].replace("'", "\\'")
# document_category = document_summary['document_category'].replace("'", "\\'")
# Generate the Cypher query using parameters
cypher_query = f'''
// Ensure the User node exists
MERGE (user:User {{ userId: $user_id }})
// Ensure the SemanticMemory node exists and is connected to the User
MERGE (semantic:SemanticMemory {{ userId: $user_id }})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
// Create the Document node with its properties
CREATE (document:Document {{
title: $title,
summary: $summary,
documentCategory: $document_category
}})
// Link the Document node to the SemanticMemory node
CREATE (semantic)-[:HAS_DOCUMENT]->(document)
'''
return cypher_query
def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str):
# Generate the Cypher query
cypher_query = f'''
MATCH (user:User {{userId: $user_id}})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
SET document.vectordbNamespace = $vectordb_namespace
RETURN document
'''
# Parameters for the query
parameters = {
'user_id': user_id,
'vectordb_namespace': vectordb_namespace
}
# Execute the query with the provided parameters
result = self.query(cypher_query, parameters)
return result
class NetworkXGraphDB(AbstractGraphDB):
def __init__(self):
self.graph = nx.Graph()
# Initialize other necessary properties or configurations
def create_base_cognitive_architecture(self, user_id: str):
# Add nodes for user and memory types if they don't exist
self.graph.add_node(user_id, type='User')
self.graph.add_node(f"{user_id}_semantic", type='SemanticMemory')
self.graph.add_node(f"{user_id}_episodic", type='EpisodicMemory')
self.graph.add_node(f"{user_id}_buffer", type='Buffer')
# Add edges to connect user to memory types
self.graph.add_edge(user_id, f"{user_id}_semantic", relation='HAS_SEMANTIC_MEMORY')
self.graph.add_edge(user_id, f"{user_id}_episodic", relation='HAS_EPISODIC_MEMORY')
self.graph.add_edge(user_id, f"{user_id}_buffer", relation='HAS_BUFFER')
def delete_all_user_memories(self, user_id: str):
# Remove nodes and edges related to the user's memories
for memory_type in ['semantic', 'episodic', 'buffer']:
memory_node = f"{user_id}_{memory_type}"
self.graph.remove_node(memory_node)
def delete_specific_memory_type(self, user_id: str, memory_type: str):
# Remove a specific type of memory node and its related edges
memory_node = f"{user_id}_{memory_type.lower()}"
if memory_node in self.graph:
self.graph.remove_node(memory_node)
# Methods for retrieving semantic, episodic, and buffer memories
def retrieve_semantic_memory(self, user_id: str):
return [n for n in self.graph.neighbors(f"{user_id}_semantic")]
def retrieve_episodic_memory(self, user_id: str):
return [n for n in self.graph.neighbors(f"{user_id}_episodic")]
def retrieve_buffer_memory(self, user_id: str):
return [n for n in self.graph.neighbors(f"{user_id}_buffer")]
class GraphDBFactory:
def create_graph_db(self, db_type, **kwargs):
if db_type == 'neo4j':
return Neo4jGraphDB(**kwargs)
elif db_type == 'networkx':
return NetworkXGraphDB(**kwargs)
else:
raise ValueError(f"Unsupported database type: {db_type}")

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class DocsModel(Base):
__tablename__ = 'docs'

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class MemoryModel(Base):
__tablename__ = 'memories'

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class MetaDatas(Base):

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class Operation(Base):

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class Session(Base):

View file

@ -14,7 +14,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class TestOutput(Base):

View file

@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class TestSet(Base):

View file

@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from database.database import Base
from ..database.database import Base
class User(Base):

View file

@ -0,0 +1,10 @@
DEFAULT_PRESET = "cognitive_architecture_chat"
preset_options = [DEFAULT_PRESET]
def use_preset():
"""Placeholder for different present options"""
pass

View file

@ -0,0 +1,7 @@
from enum import Enum
class ChunkStrategy(Enum):
EXACT = 'exact'
PARAGRAPH = 'paragraph'
SENTENCE = 'sentence'
VANILLA = 'vanilla'

View file

@ -0,0 +1,79 @@
import uuid
from graphviz import Digraph
# from graph_database.graph import KnowledgeGraph
class Node:
def __init__(self, id, description, color):
self.id = id
self.description = description
self.color = color
class Edge:
def __init__(self, source, target, label, color):
self.source = source
self.target = target
self.label = label
self.color = color
# def visualize_knowledge_graph(kg: KnowledgeGraph):
# dot = Digraph(comment="Knowledge Graph")
#
# # Add nodes
# for node in kg.nodes:
# dot.node(str(node.id), node.description, color=node.color)
#
# # Add edges
# for edge in kg.edges:
# dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
#
# # Render the graph
# dot.render("knowledge_graph.gv", view=True)
#
#
def format_dict(d):
# Initialize an empty list to store formatted items
formatted_items = []
# Iterate through all key-value pairs
for key, value in d.items():
# Format key-value pairs with a colon and space, and adding quotes for string values
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
formatted_items.append(formatted_item)
# Join all formatted items with a comma and a space
formatted_string = ", ".join(formatted_items)
# Add curly braces to mimic a dictionary
formatted_string = f"{{{formatted_string}}}"
return formatted_string
def append_uuid_to_variable_names(variable_mapping):
unique_variable_mapping = {}
for original_name in variable_mapping.values():
unique_name = f"{original_name}_{uuid.uuid4().hex}"
unique_variable_mapping[original_name] = unique_name
return unique_variable_mapping
# Update the functions to use the unique variable names
def create_node_variable_mapping(nodes):
mapping = {}
for node in nodes:
variable_name = f"{node['category']}{node['id']}".lower()
mapping[node['id']] = variable_name
return mapping
def create_edge_variable_mapping(edges):
mapping = {}
for edge in edges:
# Construct a unique identifier for the edge
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
mapping[(edge['source'], edge['target'])] = variable_name
return mapping

View file

@ -4,7 +4,7 @@ from io import BytesIO
import os, sys
# Add the parent directory to sys.path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
from ..vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
import sqlalchemy as sa
logging.basicConfig(level=logging.INFO)
import marvin
@ -13,13 +13,13 @@ from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.retrievers import WeaviateHybridSearchRetriever
from weaviate.gql.get import HybridFusion
from models.sessions import Session
from models.testset import TestSet
from models.testoutput import TestOutput
from models.metadatas import MetaDatas
from models.operation import Operation
from ..models.sessions import Session
from ..models.testset import TestSet
from ..models.testoutput import TestOutput
from ..models.metadatas import MetaDatas
from ..models.operation import Operation
from sqlalchemy.orm import sessionmaker
from database.database import engine
from ..database.database import engine
load_dotenv()
from typing import Optional
import time

View file

@ -1,7 +1,7 @@
from langchain.document_loaders import PyPDFLoader
import sys, os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from shared.chunk_strategy import ChunkStrategy
from ..shared.chunk_strategy import ChunkStrategy
import re
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):

View file

@ -9,23 +9,14 @@ from langchain.chains import GraphCypherQAChain
from langchain.chat_models import ChatOpenAI
# from marvin import ai_classifier
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
DEFAULT_PRESET = "promethai_chat"
preset_options = [DEFAULT_PRESET]
import questionary
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
def create_config_dir():
if not os.path.exists(PROMETHAI_DIR):
os.makedirs(PROMETHAI_DIR, exist_ok=True)
folders = ["personas", "humans", "archival", "agents"]
for folder in folders:
if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)):
os.makedirs(os.path.join(PROMETHAI_DIR, folder))
from cognitive_architecture.models.sessions import Session
from cognitive_architecture.models.testset import TestSet
from cognitive_architecture.models.testoutput import TestOutput
from cognitive_architecture.models.metadatas import MetaDatas
from cognitive_architecture.models.operation import Operation
from cognitive_architecture.models.docs import DocsModel
from cognitive_architecture.models.memory import MemoryModel
from pathlib import Path
@ -40,14 +31,8 @@ import uuid
from graphviz import Digraph
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
txt_path = "dune.txt"
from cognitive_architecture.database.database_crud import session_scope
from cognitive_architecture.database.database import AsyncSessionLocal
import openai
import instructor
@ -57,333 +42,188 @@ import instructor
instructor.patch()
from pydantic import BaseModel, Field
from typing import List
DEFAULT_PRESET = "promethai_chat"
preset_options = [DEFAULT_PRESET]
import questionary
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
load_dotenv()
class Node(BaseModel):
id: int
description: str
category: str
color: str ="blue"
memory_type: str
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
from cognitive_architecture.config import Config
config = Config()
config.load()
print(config.model)
print(config.openai_key)
class Edge(BaseModel):
source: int
target: int
description: str
color: str= "blue"
class KnowledgeGraph(BaseModel):
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)
#
def generate_graph(input) -> KnowledgeGraph:
return openai.ChatCompletion.create(
model="gpt-4-1106-preview",
messages=[
{
"role": "user",
"content": f"""Use the given format to extract information from the following input: {input}. """,
},
{ "role":"system", "content": """You are a top-tier algorithm
designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the
knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person,
always label it as **"person"**.
Avoid using more specific terms like "mathematician" or "scientist".
- Include event, entity, time, or action nodes to the category.
- Classify the memory type as episodic or semantic.
- **Node IDs**: Never utilize integers as node IDs.
Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information,
should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**:
Do not create separate nodes for dates or numerical values.
Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**:
When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph.
In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable,
so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination."""}
],
response_model=KnowledgeGraph,
)
import logging
def execute_cypher_query(query: str):
graph_ = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein")
graph_.query(query)
# This is a placeholder for the logic that will execute the Cypher query
# You would replace this with the actual logic to run the query in your Neo4j database
print(query)
#Execute Cypher queries to create the user and memory components if they don't exist
#
# graph.query(
# f"""
# // Ensure the User node exists
# MERGE (user:User {{ userId: {user} }})
#
# // Ensure the SemanticMemory node exists
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
#
# // Ensure the EpisodicMemory node exists
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
#
# // Ensure the Buffer node exists
# MERGE (buffer:Buffer {{ userId: {user} }})
# MERGE (user)-[:HAS_BUFFER]->(buffer)
# """
# )
#
# # Execute Cypher queries to create the cognitive components in the graph
# graph.query(
# f"""
# // Parsing the query into components and linking them to the user and memory components
# MERGE (user:User {{ userId: {user} }})
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (buffer:Buffer {{ userId: {user} }})
#
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
#
# WITH user, semantic, episodic, buffer, action1, action2, time
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
# CREATE (episodic)-[:HAS_EVENT]->(action1)
# CREATE (episodic)-[:HAS_EVENT]->(action2)
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
# """
# )
import asyncio
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
class Node:
def __init__(self, id, description, color):
self.id = id
self.description = description
self.color = color
async def get_vectordb_namespace(session: AsyncSession, user_id: str):
try:
result = await session.execute(
select(MemoryModel.id).where(MemoryModel.user_id == user_id).order_by(MemoryModel.created_at.desc()).limit(1)
)
namespace = result.scalar_one_or_none()
return namespace
except Exception as e:
logging.error(f"An error occurred while retrieving the Vectordb_namespace: {str(e)}")
return None
class Edge:
def __init__(self, source, target, label, color):
self.source = source
self.target = target
self.label = label
self.color = color
def visualize_knowledge_graph(kg: KnowledgeGraph):
dot = Digraph(comment="Knowledge Graph")
# Add nodes
for node in kg.nodes:
dot.node(str(node.id), node.description, color=node.color)
# Add edges
for edge in kg.edges:
dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
# Render the graph
dot.render("knowledge_graph.gv", view=True)
# async def retrieve_job_by_id(session, user_id, job_id):
# try:
# result = await session.execute(
# session.query(Session.id)
# .filter_by(user_id=user_id, id=job_id)
# .order_by(Session.created_at)
# )
# return result.scalar_one_or_none()
# except Exception as e:
# logging.error(f"An error occurred while retrieving the job: {str(e)}")
# return None
def create_base_queries_from_user( user_id: str):
# Create the user and memory components if they don't exist
user_memory_cypher = f"""
MERGE (user:User {{userId: '{user_id}'}})
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
MERGE (buffer:Buffer {{userId: '{user_id}'}})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
MERGE (user)-[:HAS_BUFFER]->(buffer)
async def update_document_vectordb_namespace(postgres_session: AsyncSession, user_id: str, namespace: str = None):
"""
Update the Document node with the Vectordb_namespace for the given user. If the namespace is not provided,
it will be retrieved from the PostgreSQL database.
return user_memory_cypher
Args:
postgres_session (AsyncSession): The async session for connecting to the PostgreSQL database.
user_id (str): The user's unique identifier.
namespace (str, optional): The Vectordb_namespace. If None, it will be retrieved from the database.
# Function to append a UUID4 to the variable names to ensure uniqueness
def append_uuid_to_variable_names(variable_mapping):
unique_variable_mapping = {}
for original_name in variable_mapping.values():
unique_name = f"{original_name}_{uuid.uuid4().hex}"
unique_variable_mapping[original_name] = unique_name
return unique_variable_mapping
Returns:
The result of the update operation or None if an error occurred.
"""
vectordb_namespace = namespace
# Update the functions to use the unique variable names
def create_node_variable_mapping(nodes):
mapping = {}
for node in nodes:
variable_name = f"{node['category']}{node['id']}".lower()
mapping[node['id']] = variable_name
return mapping
# Retrieve namespace from the database if not provided
if vectordb_namespace is None:
vectordb_namespace = await get_vectordb_namespace(postgres_session, user_id)
if not vectordb_namespace:
logging.error("Vectordb_namespace could not be retrieved.")
return None
def create_edge_variable_mapping(edges):
mapping = {}
for edge in edges:
# Construct a unique identifier for the edge
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
mapping[(edge['source'], edge['target'])] = variable_name
return mapping
# Update the function to generate Cypher CREATE statements for nodes with unique variable names
def format_dict(d):
# Initialize an empty list to store formatted items
formatted_items = []
# Iterate through all key-value pairs
for key, value in d.items():
# Format key-value pairs with a colon and space, and adding quotes for string values
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
formatted_items.append(formatted_item)
# Join all formatted items with a comma and a space
formatted_string = ", ".join(formatted_items)
# Add curly braces to mimic a dictionary
formatted_string = f"{{{formatted_string}}}"
return formatted_string
def generate_create_statements_for_nodes_with_uuid(nodes, unique_mapping):
create_statements = []
for node in nodes:
original_variable_name = node_variable_mapping[node['id']]
unique_variable_name = unique_mapping[original_variable_name]
node_label = node['category'].capitalize()
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
try:
properties = format_dict(properties)
except:
pass
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
return create_statements
# Update the function to generate Cypher CREATE statements for edges with unique variable names
def generate_create_statements_for_edges_with_uuid(edges, unique_mapping):
create_statements = []
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
for edge in edges:
# print("HERE IS THE EDGE", edge)
source_variable = unique_mapping[node_variable_mapping[edge['source']]]
target_variable = unique_mapping[node_variable_mapping[edge['target']]]
relationship = edge['description'].replace(" ", "_").upper()
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
return create_statements
# Update the function to generate Cypher CREATE statements for memory type relationships with unique variable names
def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_mapping):
create_statements = []
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
create_statements.append(with_statement)
# Loop through each node and create relationships based on memory_type
for node in nodes:
original_variable_name = node_variable_mapping[node['id']]
unique_variable_name = unique_mapping[original_variable_name]
if node['memory_type'] == 'semantic':
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
elif node['memory_type'] == 'episodic':
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
if node['category'] == 'time':
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
# Assuming buffer holds all actions and times
# if node['category'] in ['action', 'time']:
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
return create_statements
# Update the Document node in Neo4j with the namespace
update_result = update_document_node_with_namespace(user_id, vectordb_namespace)
return update_result
# Main execution logic
if __name__ == "__main__":
user_id = "User1"
query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
# Generate the knowledge graph from the user input
knowledge_graph = generate_graph(query_input)
visualize_knowledge_graph(knowledge_graph)
# out = knowledge_graph.dict()
# print(out)
# query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
#
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
graph_dic = graph.dict()
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
# Create unique variable names for each node
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
graph_dic['nodes'], unique_node_variable_mapping)
# # Combine all statements
cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
cypher_statements_joined = "\n".join(cypher_statements)
execute_cypher_query(cypher_statements_joined)
# Translate the KnowledgeGraph into Cypher queries
# Make document summary in Semantic Memory
# Document summary links to a Namespace in Vector Store
# Categorize document types in Semantic Memory
# Make a spine classifier that retrieves the relevant document namespaces from Vector Store
# # Generate the knowledge graph from the user input
# knowledge_graph = generate_graph(query_input)
# visualize_knowledge_graph(knowledge_graph)
# # out = knowledge_graph.dict()
# # print(out)
# #
# graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
# graph_dic = graph.dict()
#
# Connect document summary to chunks in Weaviate vector store
# node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
# edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
# # Create unique variable names for each node
# unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
# unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
# create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
# create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
#
# memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
# graph_dic['nodes'], unique_node_variable_mapping)
#
# # # Combine all statements
# cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
# cypher_statements_joined = "\n".join(cypher_statements)
#
#
#
# execute_cypher_query(cypher_statements_joined)
# print(cypher_query)
# #
# # # Execute the Cypher queries to create the graph in Neo4j
# execute_cypher_query(cypher_query)
# # Refresh the graph schema
# graph.refresh_schema()
# bartleby_summary = {
# "document_category": "Classic Literature",
# "title": "Bartleby, the Scrivener",
# "summary": (
# "Bartleby, the Scrivener: A Story of Wall Street' is a short story by Herman Melville "
# "that tells the tale of Bartleby, a scrivener, or copyist, who works for a Manhattan "
# "lawyer. Initially, Bartleby is a competent and industrious worker. However, one day, "
# "when asked to proofread a document, he responds with what becomes his constant refrain "
# "to any request: 'I would prefer not to.' As the story progresses, Bartleby becomes "
# "increasingly passive, refusing not just work but also food and eventually life itself, "
# "as he spirals into a state of passive resistance. The lawyer, the narrator of the story, "
# "is both fascinated and frustrated by Bartleby's behavior. Despite attempts to understand "
# "and help him, Bartleby remains an enigmatic figure, his motives and thoughts unexplained. "
# "He is eventually evicted from the office and later found dead in a prison yard, having "
# "preferred not to live. The story is a meditation on the themes of isolation, societal "
# "obligation, and the inexplicable nature of human behavior."
# )
# }
# rs = create_document_node_cypher(bartleby_summary, user_id)
#
# parameters = {
# 'user_id': user_id,
# 'title': bartleby_summary['title'],
# 'summary': bartleby_summary['summary'],
# 'document_category': bartleby_summary['document_category']
# }
#
# execute_cypher_query(rs, parameters)
#
# # Print the schema to the console
# print(graph.schema)
# async def main():
# user_id = "User1"
#
# async with session_scope(AsyncSessionLocal()) as session:
# await update_document_vectordb_namespace(session, user_id)
#
# # print(rs)
#
# if __name__ == "__main__":
# import asyncio
#
# asyncio.run(main())
#
# # config = Config()
# # config.load()
# #
# # print(config.model)
# # print(config.openai_key)
async def main():
user_id = "User1"
from cognitive_architecture.graph_database.graph import Neo4jGraphDB
# Example initialization (replace with your actual connection details)
neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein')
# Generate the Cypher query for a specific user
user_id = 'user123' # Replace with the actual user ID
cypher_query = neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id)
# Execute the generated Cypher query
result = neo4j_graph_db.query(cypher_query)
# async with session_scope(AsyncSessionLocal()) as session:
# await update_document_vectordb_namespace(session, user_id)
# print(rs)
if __name__ == "__main__":
import asyncio
asyncio.run(main())