from enum import Enum import typer import os # import marvin # from pydantic_settings import BaseSettings from langchain.chains import GraphCypherQAChain from langchain.chat_models import ChatOpenAI # from marvin import ai_classifier # marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") DEFAULT_PRESET = "promethai_chat" preset_options = [DEFAULT_PRESET] import questionary PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") def create_config_dir(): if not os.path.exists(PROMETHAI_DIR): os.makedirs(PROMETHAI_DIR, exist_ok=True) folders = ["personas", "humans", "archival", "agents"] for folder in folders: if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)): os.makedirs(os.path.join(PROMETHAI_DIR, folder)) from pathlib import Path from langchain.document_loaders import TextLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.graphs import Neo4jGraph from langchain.text_splitter import TokenTextSplitter from langchain.vectorstores import Neo4jVector import os from dotenv import load_dotenv import uuid load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") txt_path = "dune.txt" graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein") import openai import instructor # Adds response_model to ChatCompletion # Allows the return of Pydantic model rather than raw JSON instructor.patch() from pydantic import BaseModel, Field from typing import List class Node(BaseModel): id: int description: str category: str color: str ="blue" memory_type: str # # class EntityNode(BaseModel): # id: int # description: str # # # class TimeContextNode(BaseModel): # id: int # description: str # # # class ActionNode(BaseModel): # id: int # description: str class Edge(BaseModel): source: int target: int description: str color: str= "blue" class KnowledgeGraph(BaseModel): nodes: List[Node] = Field(..., default_factory=list) edges: List[Edge] = Field(..., default_factory=list) # def generate_graph(input) -> KnowledgeGraph: return openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[ { "role": "user", "content": f"""Use the given format to extract information from the following input: {input}. """, }, { "role":"system", "content": """You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph. - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience. ## 2. Labeling Nodes - **Consistency**: Ensure you use basic or elementary types for node labels. - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist". - Include event, entity, time, or action nodes to the category. - Classify the memory type as episodic or semantic. - **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text. ## 3. Handling Numerical Data and Dates - Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes. - **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes. - **Property Format**: Properties must be in a key-value format. - **Quotation Marks**: Never use escaped single or double quotes within property values. - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. ## 4. Coreference Resolution - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency. If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID. Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. ## 5. Strict Compliance Adhere to the rules strictly. Non-compliance will result in termination."""} ], response_model=KnowledgeGraph, ) # async def memory_route(self, memory_type: str): # @ai_classifier # class MemoryRoute(Enum): # """Represents classifer for type of memories""" # # semantic_memory = "semantic_memory" # episodic_memory = "episodic_memory" # # # namespace = MemoryRoute(str(memory_type)) # # return namespace # # graph = generate_graph("I went to a walk in the forest in the afternoon and got information from a book.") # # print("got here") # # # print(graph) def execute_cypher_query(query: str): graph.query(query) # This is a placeholder for the logic that will execute the Cypher query # You would replace this with the actual logic to run the query in your Neo4j database print(query) #Execute Cypher queries to create the user and memory components if they don't exist # # graph.query( # f""" # // Ensure the User node exists # MERGE (user:User {{ userId: {user} }}) # # // Ensure the SemanticMemory node exists # MERGE (semantic:SemanticMemory {{ userId: {user} }}) # MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) # # // Ensure the EpisodicMemory node exists # MERGE (episodic:EpisodicMemory {{ userId: {user} }}) # MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) # # // Ensure the Buffer node exists # MERGE (buffer:Buffer {{ userId: {user} }}) # MERGE (user)-[:HAS_BUFFER]->(buffer) # """ # ) # # # Execute Cypher queries to create the cognitive components in the graph # graph.query( # f""" # // Parsing the query into components and linking them to the user and memory components # MERGE (user:User {{ userId: {user} }}) # MERGE (semantic:SemanticMemory {{ userId: {user} }}) # MERGE (episodic:EpisodicMemory {{ userId: {user} }}) # MERGE (buffer:Buffer {{ userId: {user} }}) # # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }}) # CREATE (action2:Event {{ description: 'get information', source: 'book' }}) # CREATE (time:TimeContext {{ description: 'in the afternoon' }}) # # WITH user, semantic, episodic, buffer, action1, action2, time # CREATE (knowledge:Knowledge {{ content: 'information from a book' }}) # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge) # CREATE (episodic)-[:HAS_EVENT]->(action1) # CREATE (episodic)-[:HAS_EVENT]->(action2) # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2) # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time) # """ # ) def create_cypher_queries_from_graph(graph:str, user_id: str): # Create nodes # Create the user and memory components if they don't exist user_memory_cypher = f""" MERGE (user:User {{userId: '{user_id}'}}) MERGE (semantic:SemanticMemory {{userId: '{user_id}'}}) MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}}) MERGE (buffer:Buffer {{userId: '{user_id}'}}) MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) MERGE (user)-[:HAS_BUFFER]->(buffer) """ # Combine all Cypher queries combined_cypher_query = f""" {user_memory_cypher} {graph} """ return combined_cypher_query from graphviz import Digraph class Node: def __init__(self, id, description, color): self.id = id self.description = description self.color = color class Edge: def __init__(self, source, target, label, color): self.source = source self.target = target self.label = label self.color = color def visualize_knowledge_graph(kg: KnowledgeGraph): dot = Digraph(comment="Knowledge Graph") # Add nodes for node in kg.nodes: dot.node(str(node.id), node.description, color=node.color) # Add edges for edge in kg.edges: dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color) # Render the graph dot.render("knowledge_graph.gv", view=True) # Main execution logic if __name__ == "__main__": user_id = "User1" query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" # Generate the knowledge graph from the user input # knowledge_graph = generate_graph(query_input) # out = knowledge_graph.dict() # print(out) graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") print(graph.dict()) visualize_knowledge_graph(graph) # Translate the KnowledgeGraph into Cypher queries # cypher_query = create_cypher_queries_from_graph(out['graph_query'], user_id) # print(cypher_query) # # # # # Execute the Cypher queries to create the graph in Neo4j # execute_cypher_query(cypher_query) # # Refresh the graph schema # graph.refresh_schema() # # # Print the schema to the console # print(graph.schema)