from enum import Enum import typer import os import uuid # import marvin # from pydantic_settings import BaseSettings from langchain.chains import GraphCypherQAChain from langchain.chat_models import ChatOpenAI # from marvin import ai_classifier # marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") DEFAULT_PRESET = "promethai_chat" preset_options = [DEFAULT_PRESET] import questionary PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") def create_config_dir(): if not os.path.exists(PROMETHAI_DIR): os.makedirs(PROMETHAI_DIR, exist_ok=True) folders = ["personas", "humans", "archival", "agents"] for folder in folders: if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)): os.makedirs(os.path.join(PROMETHAI_DIR, folder)) from pathlib import Path from langchain.document_loaders import TextLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.graphs import Neo4jGraph from langchain.text_splitter import TokenTextSplitter from langchain.vectorstores import Neo4jVector import os from dotenv import load_dotenv import uuid from graphviz import Digraph load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") txt_path = "dune.txt" import openai import instructor # Adds response_model to ChatCompletion # Allows the return of Pydantic model rather than raw JSON instructor.patch() from pydantic import BaseModel, Field from typing import List class Node(BaseModel): id: int description: str category: str color: str ="blue" memory_type: str class Edge(BaseModel): source: int target: int description: str color: str= "blue" class KnowledgeGraph(BaseModel): nodes: List[Node] = Field(..., default_factory=list) edges: List[Edge] = Field(..., default_factory=list) # def generate_graph(input) -> KnowledgeGraph: return openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[ { "role": "user", "content": f"""Use the given format to extract information from the following input: {input}. """, }, { "role":"system", "content": """You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph. - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience. ## 2. Labeling Nodes - **Consistency**: Ensure you use basic or elementary types for node labels. - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist". - Include event, entity, time, or action nodes to the category. - Classify the memory type as episodic or semantic. - **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text. ## 3. Handling Numerical Data and Dates - Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes. - **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes. - **Property Format**: Properties must be in a key-value format. - **Quotation Marks**: Never use escaped single or double quotes within property values. - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. ## 4. Coreference Resolution - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency. If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID. Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. ## 5. Strict Compliance Adhere to the rules strictly. Non-compliance will result in termination."""} ], response_model=KnowledgeGraph, ) def execute_cypher_query(query: str): graph_ = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein") graph_.query(query) # This is a placeholder for the logic that will execute the Cypher query # You would replace this with the actual logic to run the query in your Neo4j database print(query) #Execute Cypher queries to create the user and memory components if they don't exist # # graph.query( # f""" # // Ensure the User node exists # MERGE (user:User {{ userId: {user} }}) # # // Ensure the SemanticMemory node exists # MERGE (semantic:SemanticMemory {{ userId: {user} }}) # MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) # # // Ensure the EpisodicMemory node exists # MERGE (episodic:EpisodicMemory {{ userId: {user} }}) # MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) # # // Ensure the Buffer node exists # MERGE (buffer:Buffer {{ userId: {user} }}) # MERGE (user)-[:HAS_BUFFER]->(buffer) # """ # ) # # # Execute Cypher queries to create the cognitive components in the graph # graph.query( # f""" # // Parsing the query into components and linking them to the user and memory components # MERGE (user:User {{ userId: {user} }}) # MERGE (semantic:SemanticMemory {{ userId: {user} }}) # MERGE (episodic:EpisodicMemory {{ userId: {user} }}) # MERGE (buffer:Buffer {{ userId: {user} }}) # # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }}) # CREATE (action2:Event {{ description: 'get information', source: 'book' }}) # CREATE (time:TimeContext {{ description: 'in the afternoon' }}) # # WITH user, semantic, episodic, buffer, action1, action2, time # CREATE (knowledge:Knowledge {{ content: 'information from a book' }}) # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge) # CREATE (episodic)-[:HAS_EVENT]->(action1) # CREATE (episodic)-[:HAS_EVENT]->(action2) # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time) # """ # ) class Node: def __init__(self, id, description, color): self.id = id self.description = description self.color = color class Edge: def __init__(self, source, target, label, color): self.source = source self.target = target self.label = label self.color = color def visualize_knowledge_graph(kg: KnowledgeGraph): dot = Digraph(comment="Knowledge Graph") # Add nodes for node in kg.nodes: dot.node(str(node.id), node.description, color=node.color) # Add edges for edge in kg.edges: dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color) # Render the graph dot.render("knowledge_graph.gv", view=True) def create_base_queries_from_user( user_id: str): # Create the user and memory components if they don't exist user_memory_cypher = f""" MERGE (user:User {{userId: '{user_id}'}}) MERGE (semantic:SemanticMemory {{userId: '{user_id}'}}) MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}}) MERGE (buffer:Buffer {{userId: '{user_id}'}}) MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) MERGE (user)-[:HAS_BUFFER]->(buffer) """ return user_memory_cypher # Function to append a UUID4 to the variable names to ensure uniqueness def append_uuid_to_variable_names(variable_mapping): unique_variable_mapping = {} for original_name in variable_mapping.values(): unique_name = f"{original_name}_{uuid.uuid4().hex}" unique_variable_mapping[original_name] = unique_name return unique_variable_mapping # Update the functions to use the unique variable names def create_node_variable_mapping(nodes): mapping = {} for node in nodes: variable_name = f"{node['category']}{node['id']}".lower() mapping[node['id']] = variable_name return mapping def create_edge_variable_mapping(edges): mapping = {} for edge in edges: # Construct a unique identifier for the edge variable_name = f"edge{edge['source']}to{edge['target']}".lower() mapping[(edge['source'], edge['target'])] = variable_name return mapping # Update the function to generate Cypher CREATE statements for nodes with unique variable names def format_dict(d): # Initialize an empty list to store formatted items formatted_items = [] # Iterate through all key-value pairs for key, value in d.items(): # Format key-value pairs with a colon and space, and adding quotes for string values formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}" formatted_items.append(formatted_item) # Join all formatted items with a comma and a space formatted_string = ", ".join(formatted_items) # Add curly braces to mimic a dictionary formatted_string = f"{{{formatted_string}}}" return formatted_string def generate_create_statements_for_nodes_with_uuid(nodes, unique_mapping): create_statements = [] for node in nodes: original_variable_name = node_variable_mapping[node['id']] unique_variable_name = unique_mapping[original_variable_name] node_label = node['category'].capitalize() properties = {k: v for k, v in node.items() if k not in ['id', 'category']} try: properties = format_dict(properties) except: pass create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})") return create_statements # Update the function to generate Cypher CREATE statements for edges with unique variable names def generate_create_statements_for_edges_with_uuid(edges, unique_mapping): create_statements = [] with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" create_statements.append(with_statement) for edge in edges: # print("HERE IS THE EDGE", edge) source_variable = unique_mapping[node_variable_mapping[edge['source']]] target_variable = unique_mapping[node_variable_mapping[edge['target']]] relationship = edge['description'].replace(" ", "_").upper() create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})") return create_statements # Update the function to generate Cypher CREATE statements for memory type relationships with unique variable names def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_mapping): create_statements = [] with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer" create_statements.append(with_statement) # Loop through each node and create relationships based on memory_type for node in nodes: original_variable_name = node_variable_mapping[node['id']] unique_variable_name = unique_mapping[original_variable_name] if node['memory_type'] == 'semantic': create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})") elif node['memory_type'] == 'episodic': create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})") if node['category'] == 'time': create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})") # Assuming buffer holds all actions and times # if node['category'] in ['action', 'time']: create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})") return create_statements # Main execution logic if __name__ == "__main__": user_id = "User1" query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" # Generate the knowledge graph from the user input knowledge_graph = generate_graph(query_input) visualize_knowledge_graph(knowledge_graph) # out = knowledge_graph.dict() # print(out) # graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") graph_dic = graph.dict() node_variable_mapping = create_node_variable_mapping(graph_dic['nodes']) edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges']) # Create unique variable names for each node unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping) unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping) create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping) create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping) memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context( graph_dic['nodes'], unique_node_variable_mapping) # # Combine all statements cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context cypher_statements_joined = "\n".join(cypher_statements) execute_cypher_query(cypher_statements_joined) # Translate the KnowledgeGraph into Cypher queries # Make document summary in Semantic Memory # Document summary links to a Namespace in Vector Store # Categorize document types in Semantic Memory # Make a spine classifier that retrieves the relevant document namespaces from Vector Store # # Connect document summary to chunks in Weaviate vector store # print(cypher_query) # # # # # Execute the Cypher queries to create the graph in Neo4j # execute_cypher_query(cypher_query) # # Refresh the graph schema # graph.refresh_schema() # # # Print the schema to the console # print(graph.schema)