Added docs functionality

2023-11-07 22:15:08 +01:00 · 2023-11-07 22:15:08 +01:00 · 8d1038ae98
commit 8d1038ae98
parent 6a13072d16
4 changed files with 424 additions and 0 deletions
--- a/level_4/.env.template
+++ b/level_4/.env.template
@ -0,0 +1,10 @@
+OPENAI_API_KEY=sk
+WEAVIATE_URL =
+WEAVIATE_API_KEY =
+ENVIRONMENT = docker
+POSTGRES_USER = bla
+POSTGRES_PASSWORD = bla
+POSTGRES_DB = bubu
+POSTGRES_HOST = localhost
+POSTGRES_HOST_DOCKER = postgres
+SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
--- a/level_4/Dockerfile
+++ b/level_4/Dockerfile
@ -0,0 +1,51 @@
+
+FROM python:3.11
+
+# Set build argument
+ARG API_ENABLED
+
+# Set environment variable based on the build argument
+ENV API_ENABLED=${API_ENABLED} \
+    PIP_NO_CACHE_DIR=true
+ENV PATH="${PATH}:/root/.poetry/bin"
+RUN pip install poetry
+
+WORKDIR /app
+COPY pyproject.toml poetry.lock /app/
+
+# Install the dependencies
+RUN poetry config virtualenvs.create false && \
+    poetry install --no-root --no-dev
+
+RUN apt-get update -q && \
+    apt-get install -y -q \
+        gcc \
+        python3-dev \
+        curl \
+        zip \
+        jq \
+        netcat-traditional && \
+    pip install poetry && \
+    curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
+    unzip -qq awscliv2.zip && \
+    ./aws/install && \
+    apt-get clean && \
+    rm -rf \
+        awscliv2.zip \
+        /var/lib/apt/lists/* \
+        /tmp/* \
+        /var/tmp/*
+
+
+
+#RUN playwright install
+#RUN playwright install-deps
+
+WORKDIR /app
+COPY . /app
+COPY scripts/ /app
+COPY entrypoint.sh /app/entrypoint.sh
+COPY scripts/create_database.py /app/create_database.py
+RUN chmod +x /app/entrypoint.sh
+
+ENTRYPOINT ["/app/entrypoint.sh"]
--- a/level_4/docker-compose.yml
+++ b/level_4/docker-compose.yml
@ -0,0 +1,77 @@
+version: "3.9"
+
+services:
+  neo4j:
+    image: neo4j:latest
+    container_name: neo4j
+    ports:
+      - "7474:7474"
+      - "7687:7687"
+    environment:
+      - NEO4J_AUTH=neo4j/pleaseletmein
+      - NEO4J_PLUGINS=["apoc"]
+    networks:
+      - promethai_mem_backend
+
+#  promethai_mem:
+#    networks:
+#      - promethai_mem_backend
+#    build:
+#      context: ./
+#    volumes:
+#      - "./:/app"
+#      - ./.data:/app/.data
+#
+#    environment:
+#      - HOST=0.0.0.0
+#    profiles: ["exclude-from-up"]
+#    ports:
+#      - 8000:8000
+#      - 443:443
+#      - 80:80
+#    depends_on:
+#      - postgres
+#    deploy:
+#      resources:
+#        limits:
+#          cpus: "4.0"
+#          memory: 8GB
+
+
+  postgres:
+    image: postgres
+    container_name: postgres
+    environment:
+      - POSTGRES_HOST_AUTH_METHOD=trust
+      - POSTGRES_USER=bla
+      - POSTGRES_PASSWORD=bla
+      - POSTGRES_DB=bubu
+    networks:
+      - promethai_mem_backend
+    ports:
+      - "5432:5432"
+
+#  superset:
+#    platform: linux/amd64
+#    build:
+#      context: ./superset
+#      dockerfile: Dockerfile
+#    container_name: superset
+#    environment:
+#      - ADMIN_USERNAME=admin
+#      - ADMIN_EMAIL=vasilije@topoteretes.com
+#      - ADMIN_PASSWORD=admin
+#      - POSTGRES_USER=bla
+#      - POSTGRES_PASSWORD=bla
+#      - POSTGRES_DB=bubu
+#    networks:
+#      - promethai_mem_backend
+#    ports:
+#      - '8088:8088'
+#    depends_on:
+#      - postgres
+
+networks:
+  promethai_mem_backend:
+    name: promethai_mem_backend
+
--- a/level_4/main.py
+++ b/level_4/main.py
@ -0,0 +1,286 @@
+from enum import Enum
+
+import typer
+import os
+# import marvin
+# from pydantic_settings import BaseSettings
+from langchain.chains import GraphCypherQAChain
+from langchain.chat_models import ChatOpenAI
+# from marvin import ai_classifier
+# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
+DEFAULT_PRESET = "promethai_chat"
+preset_options = [DEFAULT_PRESET]
+import questionary
+PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
+
+
+
+def create_config_dir():
+    if not os.path.exists(PROMETHAI_DIR):
+        os.makedirs(PROMETHAI_DIR, exist_ok=True)
+
+    folders = ["personas", "humans", "archival", "agents"]
+    for folder in folders:
+        if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)):
+            os.makedirs(os.path.join(PROMETHAI_DIR, folder))
+
+
+
+from pathlib import Path
+
+from langchain.document_loaders import TextLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.graphs import Neo4jGraph
+from langchain.text_splitter import TokenTextSplitter
+from langchain.vectorstores import Neo4jVector
+import os
+from dotenv import load_dotenv
+import uuid
+
+load_dotenv()
+
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+
+txt_path =  "dune.txt"
+
+graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein")
+
+import openai
+import instructor
+
+# Adds response_model to ChatCompletion
+# Allows the return of Pydantic model rather than raw JSON
+instructor.patch()
+from pydantic import BaseModel, Field
+from typing import List
+
+class Node(BaseModel):
+    id: int
+    description: str
+    category: str
+    color: str ="blue"
+    memory_type: str
+
+#
+# class EntityNode(BaseModel):
+#     id: int
+#     description: str
+#
+#
+# class TimeContextNode(BaseModel):
+#     id: int
+#     description: str
+#
+#
+# class ActionNode(BaseModel):
+#     id: int
+#     description: str
+
+
+class Edge(BaseModel):
+    source: int
+    target: int
+    description: str
+    color: str= "blue"
+
+
+class KnowledgeGraph(BaseModel):
+    nodes: List[Node] = Field(..., default_factory=list)
+    edges: List[Edge] = Field(..., default_factory=list)
+
+
+#
+
+def generate_graph(input) -> KnowledgeGraph:
+    return openai.ChatCompletion.create(
+        model="gpt-4-1106-preview",
+        messages=[
+            {
+                "role": "user",
+                "content": f"""Use the given format to extract information from the following input: {input}. """,
+
+            },
+            {   "role":"system", "content": """You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
+                - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
+                - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
+                ## 2. Labeling Nodes
+                - **Consistency**: Ensure you use basic or elementary types for node labels.
+                  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
+                  - Include event, entity, time, or action nodes to the category.
+                  - Classify the memory type as episodic or semantic.
+                - **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
+                ## 3. Handling Numerical Data and Dates
+                - Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
+                - **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
+                - **Property Format**: Properties must be in a key-value format.
+                - **Quotation Marks**: Never use escaped single or double quotes within property values.
+                - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
+                ## 4. Coreference Resolution
+                - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
+                If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), 
+                always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.  
+                Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. 
+                ## 5. Strict Compliance
+                Adhere to the rules strictly. Non-compliance will result in termination."""}
+        ],
+        response_model=KnowledgeGraph,
+    )
+
+
+
+
+# async def memory_route(self, memory_type: str):
+#     @ai_classifier
+#     class MemoryRoute(Enum):
+#         """Represents classifer for type of memories"""
+#
+#         semantic_memory = "semantic_memory"
+#         episodic_memory = "episodic_memory"
+#
+#
+#     namespace = MemoryRoute(str(memory_type))
+#
+#     return namespace
+
+#
+# graph = generate_graph("I went to a walk in the forest in the afternoon and got information from a book.")
+# # print("got here")
+# #
+# print(graph)
+
+
+def execute_cypher_query(query: str):
+    graph.query(query)
+    # This is a placeholder for the logic that will execute the Cypher query
+    # You would replace this with the actual logic to run the query in your Neo4j database
+    print(query)
+
+#Execute Cypher queries to create the user and memory components if they don't exist
+#
+# graph.query(
+#     f"""
+#     // Ensure the User node exists
+#     MERGE (user:User {{ userId: {user} }})
+#
+#     // Ensure the SemanticMemory node exists
+#     MERGE (semantic:SemanticMemory {{ userId: {user} }})
+#     MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
+#
+#     // Ensure the EpisodicMemory node exists
+#     MERGE (episodic:EpisodicMemory {{ userId: {user} }})
+#     MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
+#
+#     // Ensure the Buffer node exists
+#     MERGE (buffer:Buffer {{ userId: {user} }})
+#     MERGE (user)-[:HAS_BUFFER]->(buffer)
+#     """
+# )
+#
+# # Execute Cypher queries to create the cognitive components in the graph
+# graph.query(
+#     f"""
+#     // Parsing the query into components and linking them to the user and memory components
+#     MERGE (user:User {{ userId: {user} }})
+#     MERGE (semantic:SemanticMemory {{ userId: {user} }})
+#     MERGE (episodic:EpisodicMemory {{ userId: {user} }})
+#     MERGE (buffer:Buffer {{ userId: {user} }})
+#
+    # CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
+    # CREATE (action2:Event {{ description: 'get information', source: 'book' }})
+    # CREATE (time:TimeContext {{ description: 'in the afternoon' }})
+    #
+    # WITH user, semantic, episodic, buffer, action1, action2, time
+    # CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
+    # CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
+    # CREATE (episodic)-[:HAS_EVENT]->(action1)
+    # CREATE (episodic)-[:HAS_EVENT]->(action2)
+    # CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
+    # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
+    # CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
+    # CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
+#     """
+# )
+
+
+def create_cypher_queries_from_graph(graph:str, user_id: str):
+    # Create nodes
+
+
+    # Create the user and memory components if they don't exist
+    user_memory_cypher = f"""
+    MERGE (user:User {{userId: '{user_id}'}})
+    MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
+    MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
+    MERGE (buffer:Buffer {{userId: '{user_id}'}})
+    MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
+    MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
+    MERGE (user)-[:HAS_BUFFER]->(buffer)
+    """
+
+    # Combine all Cypher queries
+    combined_cypher_query = f"""
+    {user_memory_cypher}
+    {graph}
+    """
+
+    return combined_cypher_query
+
+
+from graphviz import Digraph
+
+
+class Node:
+    def __init__(self, id, description, color):
+        self.id = id
+        self.description = description
+        self.color = color
+
+class Edge:
+    def __init__(self, source, target, label, color):
+        self.source = source
+        self.target = target
+        self.label = label
+        self.color = color
+def visualize_knowledge_graph(kg: KnowledgeGraph):
+    dot = Digraph(comment="Knowledge Graph")
+
+    # Add nodes
+    for node in kg.nodes:
+        dot.node(str(node.id), node.description, color=node.color)
+
+    # Add edges
+    for edge in kg.edges:
+        dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
+
+    # Render the graph
+    dot.render("knowledge_graph.gv", view=True)
+
+
+# Main execution logic
+if __name__ == "__main__":
+    user_id = "User1"
+    query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
+
+    # Generate the knowledge graph from the user input
+    # knowledge_graph = generate_graph(query_input)
+    # out = knowledge_graph.dict()
+    # print(out)
+
+    graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
+    print(graph.dict())
+    visualize_knowledge_graph(graph)
+
+
+
+    # Translate the KnowledgeGraph into Cypher queries
+    # cypher_query = create_cypher_queries_from_graph(out['graph_query'], user_id)
+
+    # print(cypher_query)
+# #
+# #     # Execute the Cypher queries to create the graph in Neo4j
+#     execute_cypher_query(cypher_query)
+# # Refresh the graph schema
+# graph.refresh_schema()
+#
+# # Print the schema to the console
+# print(graph.schema)