diff --git a/level_4/.env.template b/level_4/.env.template new file mode 100644 index 000000000..f3e423907 --- /dev/null +++ b/level_4/.env.template @@ -0,0 +1,10 @@ +OPENAI_API_KEY=sk +WEAVIATE_URL = +WEAVIATE_API_KEY = +ENVIRONMENT = docker +POSTGRES_USER = bla +POSTGRES_PASSWORD = bla +POSTGRES_DB = bubu +POSTGRES_HOST = localhost +POSTGRES_HOST_DOCKER = postgres +SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B \ No newline at end of file diff --git a/level_4/Dockerfile b/level_4/Dockerfile new file mode 100644 index 000000000..feb677fe3 --- /dev/null +++ b/level_4/Dockerfile @@ -0,0 +1,51 @@ + +FROM python:3.11 + +# Set build argument +ARG API_ENABLED + +# Set environment variable based on the build argument +ENV API_ENABLED=${API_ENABLED} \ + PIP_NO_CACHE_DIR=true +ENV PATH="${PATH}:/root/.poetry/bin" +RUN pip install poetry + +WORKDIR /app +COPY pyproject.toml poetry.lock /app/ + +# Install the dependencies +RUN poetry config virtualenvs.create false && \ + poetry install --no-root --no-dev + +RUN apt-get update -q && \ + apt-get install -y -q \ + gcc \ + python3-dev \ + curl \ + zip \ + jq \ + netcat-traditional && \ + pip install poetry && \ + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip -qq awscliv2.zip && \ + ./aws/install && \ + apt-get clean && \ + rm -rf \ + awscliv2.zip \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + + + +#RUN playwright install +#RUN playwright install-deps + +WORKDIR /app +COPY . /app +COPY scripts/ /app +COPY entrypoint.sh /app/entrypoint.sh +COPY scripts/create_database.py /app/create_database.py +RUN chmod +x /app/entrypoint.sh + +ENTRYPOINT ["/app/entrypoint.sh"] \ No newline at end of file diff --git a/level_4/docker-compose.yml b/level_4/docker-compose.yml new file mode 100644 index 000000000..692132705 --- /dev/null +++ b/level_4/docker-compose.yml @@ -0,0 +1,77 @@ +version: "3.9" + +services: + neo4j: + image: neo4j:latest + container_name: neo4j + ports: + - "7474:7474" + - "7687:7687" + environment: + - NEO4J_AUTH=neo4j/pleaseletmein + - NEO4J_PLUGINS=["apoc"] + networks: + - promethai_mem_backend + +# promethai_mem: +# networks: +# - promethai_mem_backend +# build: +# context: ./ +# volumes: +# - "./:/app" +# - ./.data:/app/.data +# +# environment: +# - HOST=0.0.0.0 +# profiles: ["exclude-from-up"] +# ports: +# - 8000:8000 +# - 443:443 +# - 80:80 +# depends_on: +# - postgres +# deploy: +# resources: +# limits: +# cpus: "4.0" +# memory: 8GB + + + postgres: + image: postgres + container_name: postgres + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=bla + - POSTGRES_PASSWORD=bla + - POSTGRES_DB=bubu + networks: + - promethai_mem_backend + ports: + - "5432:5432" + +# superset: +# platform: linux/amd64 +# build: +# context: ./superset +# dockerfile: Dockerfile +# container_name: superset +# environment: +# - ADMIN_USERNAME=admin +# - ADMIN_EMAIL=vasilije@topoteretes.com +# - ADMIN_PASSWORD=admin +# - POSTGRES_USER=bla +# - POSTGRES_PASSWORD=bla +# - POSTGRES_DB=bubu +# networks: +# - promethai_mem_backend +# ports: +# - '8088:8088' +# depends_on: +# - postgres + +networks: + promethai_mem_backend: + name: promethai_mem_backend + diff --git a/level_4/main.py b/level_4/main.py new file mode 100644 index 000000000..17f599829 --- /dev/null +++ b/level_4/main.py @@ -0,0 +1,286 @@ +from enum import Enum + +import typer +import os +# import marvin +# from pydantic_settings import BaseSettings +from langchain.chains import GraphCypherQAChain +from langchain.chat_models import ChatOpenAI +# from marvin import ai_classifier +# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") +DEFAULT_PRESET = "promethai_chat" +preset_options = [DEFAULT_PRESET] +import questionary +PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") + + + +def create_config_dir(): + if not os.path.exists(PROMETHAI_DIR): + os.makedirs(PROMETHAI_DIR, exist_ok=True) + + folders = ["personas", "humans", "archival", "agents"] + for folder in folders: + if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)): + os.makedirs(os.path.join(PROMETHAI_DIR, folder)) + + + +from pathlib import Path + +from langchain.document_loaders import TextLoader +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.graphs import Neo4jGraph +from langchain.text_splitter import TokenTextSplitter +from langchain.vectorstores import Neo4jVector +import os +from dotenv import load_dotenv +import uuid + +load_dotenv() + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + +txt_path = "dune.txt" + +graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein") + +import openai +import instructor + +# Adds response_model to ChatCompletion +# Allows the return of Pydantic model rather than raw JSON +instructor.patch() +from pydantic import BaseModel, Field +from typing import List + +class Node(BaseModel): + id: int + description: str + category: str + color: str ="blue" + memory_type: str + +# +# class EntityNode(BaseModel): +# id: int +# description: str +# +# +# class TimeContextNode(BaseModel): +# id: int +# description: str +# +# +# class ActionNode(BaseModel): +# id: int +# description: str + + +class Edge(BaseModel): + source: int + target: int + description: str + color: str= "blue" + + +class KnowledgeGraph(BaseModel): + nodes: List[Node] = Field(..., default_factory=list) + edges: List[Edge] = Field(..., default_factory=list) + + +# + +def generate_graph(input) -> KnowledgeGraph: + return openai.ChatCompletion.create( + model="gpt-4-1106-preview", + messages=[ + { + "role": "user", + "content": f"""Use the given format to extract information from the following input: {input}. """, + + }, + { "role":"system", "content": """You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph. + - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. + - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience. + ## 2. Labeling Nodes + - **Consistency**: Ensure you use basic or elementary types for node labels. + - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist". + - Include event, entity, time, or action nodes to the category. + - Classify the memory type as episodic or semantic. + - **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text. + ## 3. Handling Numerical Data and Dates + - Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes. + - **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes. + - **Property Format**: Properties must be in a key-value format. + - **Quotation Marks**: Never use escaped single or double quotes within property values. + - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`. + ## 4. Coreference Resolution + - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency. + If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), + always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID. + Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. + ## 5. Strict Compliance + Adhere to the rules strictly. Non-compliance will result in termination."""} + ], + response_model=KnowledgeGraph, + ) + + + + +# async def memory_route(self, memory_type: str): +# @ai_classifier +# class MemoryRoute(Enum): +# """Represents classifer for type of memories""" +# +# semantic_memory = "semantic_memory" +# episodic_memory = "episodic_memory" +# +# +# namespace = MemoryRoute(str(memory_type)) +# +# return namespace + +# +# graph = generate_graph("I went to a walk in the forest in the afternoon and got information from a book.") +# # print("got here") +# # +# print(graph) + + +def execute_cypher_query(query: str): + graph.query(query) + # This is a placeholder for the logic that will execute the Cypher query + # You would replace this with the actual logic to run the query in your Neo4j database + print(query) + +#Execute Cypher queries to create the user and memory components if they don't exist +# +# graph.query( +# f""" +# // Ensure the User node exists +# MERGE (user:User {{ userId: {user} }}) +# +# // Ensure the SemanticMemory node exists +# MERGE (semantic:SemanticMemory {{ userId: {user} }}) +# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) +# +# // Ensure the EpisodicMemory node exists +# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) +# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) +# +# // Ensure the Buffer node exists +# MERGE (buffer:Buffer {{ userId: {user} }}) +# MERGE (user)-[:HAS_BUFFER]->(buffer) +# """ +# ) +# +# # Execute Cypher queries to create the cognitive components in the graph +# graph.query( +# f""" +# // Parsing the query into components and linking them to the user and memory components +# MERGE (user:User {{ userId: {user} }}) +# MERGE (semantic:SemanticMemory {{ userId: {user} }}) +# MERGE (episodic:EpisodicMemory {{ userId: {user} }}) +# MERGE (buffer:Buffer {{ userId: {user} }}) +# + # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }}) + # CREATE (action2:Event {{ description: 'get information', source: 'book' }}) + # CREATE (time:TimeContext {{ description: 'in the afternoon' }}) + # + # WITH user, semantic, episodic, buffer, action1, action2, time + # CREATE (knowledge:Knowledge {{ content: 'information from a book' }}) + # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge) + # CREATE (episodic)-[:HAS_EVENT]->(action1) + # CREATE (episodic)-[:HAS_EVENT]->(action2) + # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2) + # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time) +# """ +# ) + + +def create_cypher_queries_from_graph(graph:str, user_id: str): + # Create nodes + + + # Create the user and memory components if they don't exist + user_memory_cypher = f""" + MERGE (user:User {{userId: '{user_id}'}}) + MERGE (semantic:SemanticMemory {{userId: '{user_id}'}}) + MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}}) + MERGE (buffer:Buffer {{userId: '{user_id}'}}) + MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) + MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic) + MERGE (user)-[:HAS_BUFFER]->(buffer) + """ + + # Combine all Cypher queries + combined_cypher_query = f""" + {user_memory_cypher} + {graph} + """ + + return combined_cypher_query + + +from graphviz import Digraph + + +class Node: + def __init__(self, id, description, color): + self.id = id + self.description = description + self.color = color + +class Edge: + def __init__(self, source, target, label, color): + self.source = source + self.target = target + self.label = label + self.color = color +def visualize_knowledge_graph(kg: KnowledgeGraph): + dot = Digraph(comment="Knowledge Graph") + + # Add nodes + for node in kg.nodes: + dot.node(str(node.id), node.description, color=node.color) + + # Add edges + for edge in kg.edges: + dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color) + + # Render the graph + dot.render("knowledge_graph.gv", view=True) + + +# Main execution logic +if __name__ == "__main__": + user_id = "User1" + query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" + + # Generate the knowledge graph from the user input + # knowledge_graph = generate_graph(query_input) + # out = knowledge_graph.dict() + # print(out) + + graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store") + print(graph.dict()) + visualize_knowledge_graph(graph) + + + + # Translate the KnowledgeGraph into Cypher queries + # cypher_query = create_cypher_queries_from_graph(out['graph_query'], user_id) + + # print(cypher_query) +# # +# # # Execute the Cypher queries to create the graph in Neo4j +# execute_cypher_query(cypher_query) +# # Refresh the graph schema +# graph.refresh_schema() +# +# # Print the schema to the console +# print(graph.schema)