Added docs functionality

This commit is contained in:
Vasilije 2023-11-07 22:15:08 +01:00
parent 6a13072d16
commit 8d1038ae98
4 changed files with 424 additions and 0 deletions

10
level_4/.env.template Normal file
View file

@ -0,0 +1,10 @@
OPENAI_API_KEY=sk
WEAVIATE_URL =
WEAVIATE_API_KEY =
ENVIRONMENT = docker
POSTGRES_USER = bla
POSTGRES_PASSWORD = bla
POSTGRES_DB = bubu
POSTGRES_HOST = localhost
POSTGRES_HOST_DOCKER = postgres
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B

51
level_4/Dockerfile Normal file
View file

@ -0,0 +1,51 @@
FROM python:3.11
# Set build argument
ARG API_ENABLED
# Set environment variable based on the build argument
ENV API_ENABLED=${API_ENABLED} \
PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
RUN pip install poetry
WORKDIR /app
COPY pyproject.toml poetry.lock /app/
# Install the dependencies
RUN poetry config virtualenvs.create false && \
poetry install --no-root --no-dev
RUN apt-get update -q && \
apt-get install -y -q \
gcc \
python3-dev \
curl \
zip \
jq \
netcat-traditional && \
pip install poetry && \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip -qq awscliv2.zip && \
./aws/install && \
apt-get clean && \
rm -rf \
awscliv2.zip \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*
#RUN playwright install
#RUN playwright install-deps
WORKDIR /app
COPY . /app
COPY scripts/ /app
COPY entrypoint.sh /app/entrypoint.sh
COPY scripts/create_database.py /app/create_database.py
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]

View file

@ -0,0 +1,77 @@
version: "3.9"
services:
neo4j:
image: neo4j:latest
container_name: neo4j
ports:
- "7474:7474"
- "7687:7687"
environment:
- NEO4J_AUTH=neo4j/pleaseletmein
- NEO4J_PLUGINS=["apoc"]
networks:
- promethai_mem_backend
# promethai_mem:
# networks:
# - promethai_mem_backend
# build:
# context: ./
# volumes:
# - "./:/app"
# - ./.data:/app/.data
#
# environment:
# - HOST=0.0.0.0
# profiles: ["exclude-from-up"]
# ports:
# - 8000:8000
# - 443:443
# - 80:80
# depends_on:
# - postgres
# deploy:
# resources:
# limits:
# cpus: "4.0"
# memory: 8GB
postgres:
image: postgres
container_name: postgres
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
- POSTGRES_USER=bla
- POSTGRES_PASSWORD=bla
- POSTGRES_DB=bubu
networks:
- promethai_mem_backend
ports:
- "5432:5432"
# superset:
# platform: linux/amd64
# build:
# context: ./superset
# dockerfile: Dockerfile
# container_name: superset
# environment:
# - ADMIN_USERNAME=admin
# - ADMIN_EMAIL=vasilije@topoteretes.com
# - ADMIN_PASSWORD=admin
# - POSTGRES_USER=bla
# - POSTGRES_PASSWORD=bla
# - POSTGRES_DB=bubu
# networks:
# - promethai_mem_backend
# ports:
# - '8088:8088'
# depends_on:
# - postgres
networks:
promethai_mem_backend:
name: promethai_mem_backend

286
level_4/main.py Normal file
View file

@ -0,0 +1,286 @@
from enum import Enum
import typer
import os
# import marvin
# from pydantic_settings import BaseSettings
from langchain.chains import GraphCypherQAChain
from langchain.chat_models import ChatOpenAI
# from marvin import ai_classifier
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
DEFAULT_PRESET = "promethai_chat"
preset_options = [DEFAULT_PRESET]
import questionary
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
def create_config_dir():
if not os.path.exists(PROMETHAI_DIR):
os.makedirs(PROMETHAI_DIR, exist_ok=True)
folders = ["personas", "humans", "archival", "agents"]
for folder in folders:
if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)):
os.makedirs(os.path.join(PROMETHAI_DIR, folder))
from pathlib import Path
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.graphs import Neo4jGraph
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Neo4jVector
import os
from dotenv import load_dotenv
import uuid
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
txt_path = "dune.txt"
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein")
import openai
import instructor
# Adds response_model to ChatCompletion
# Allows the return of Pydantic model rather than raw JSON
instructor.patch()
from pydantic import BaseModel, Field
from typing import List
class Node(BaseModel):
id: int
description: str
category: str
color: str ="blue"
memory_type: str
#
# class EntityNode(BaseModel):
# id: int
# description: str
#
#
# class TimeContextNode(BaseModel):
# id: int
# description: str
#
#
# class ActionNode(BaseModel):
# id: int
# description: str
class Edge(BaseModel):
source: int
target: int
description: str
color: str= "blue"
class KnowledgeGraph(BaseModel):
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)
#
def generate_graph(input) -> KnowledgeGraph:
return openai.ChatCompletion.create(
model="gpt-4-1106-preview",
messages=[
{
"role": "user",
"content": f"""Use the given format to extract information from the following input: {input}. """,
},
{ "role":"system", "content": """You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
- Include event, entity, time, or action nodes to the category.
- Classify the memory type as episodic or semantic.
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
## 5. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination."""}
],
response_model=KnowledgeGraph,
)
# async def memory_route(self, memory_type: str):
# @ai_classifier
# class MemoryRoute(Enum):
# """Represents classifer for type of memories"""
#
# semantic_memory = "semantic_memory"
# episodic_memory = "episodic_memory"
#
#
# namespace = MemoryRoute(str(memory_type))
#
# return namespace
#
# graph = generate_graph("I went to a walk in the forest in the afternoon and got information from a book.")
# # print("got here")
# #
# print(graph)
def execute_cypher_query(query: str):
graph.query(query)
# This is a placeholder for the logic that will execute the Cypher query
# You would replace this with the actual logic to run the query in your Neo4j database
print(query)
#Execute Cypher queries to create the user and memory components if they don't exist
#
# graph.query(
# f"""
# // Ensure the User node exists
# MERGE (user:User {{ userId: {user} }})
#
# // Ensure the SemanticMemory node exists
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
#
# // Ensure the EpisodicMemory node exists
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
#
# // Ensure the Buffer node exists
# MERGE (buffer:Buffer {{ userId: {user} }})
# MERGE (user)-[:HAS_BUFFER]->(buffer)
# """
# )
#
# # Execute Cypher queries to create the cognitive components in the graph
# graph.query(
# f"""
# // Parsing the query into components and linking them to the user and memory components
# MERGE (user:User {{ userId: {user} }})
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
# MERGE (buffer:Buffer {{ userId: {user} }})
#
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
#
# WITH user, semantic, episodic, buffer, action1, action2, time
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
# CREATE (episodic)-[:HAS_EVENT]->(action1)
# CREATE (episodic)-[:HAS_EVENT]->(action2)
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
# """
# )
def create_cypher_queries_from_graph(graph:str, user_id: str):
# Create nodes
# Create the user and memory components if they don't exist
user_memory_cypher = f"""
MERGE (user:User {{userId: '{user_id}'}})
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
MERGE (buffer:Buffer {{userId: '{user_id}'}})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
MERGE (user)-[:HAS_BUFFER]->(buffer)
"""
# Combine all Cypher queries
combined_cypher_query = f"""
{user_memory_cypher}
{graph}
"""
return combined_cypher_query
from graphviz import Digraph
class Node:
def __init__(self, id, description, color):
self.id = id
self.description = description
self.color = color
class Edge:
def __init__(self, source, target, label, color):
self.source = source
self.target = target
self.label = label
self.color = color
def visualize_knowledge_graph(kg: KnowledgeGraph):
dot = Digraph(comment="Knowledge Graph")
# Add nodes
for node in kg.nodes:
dot.node(str(node.id), node.description, color=node.color)
# Add edges
for edge in kg.edges:
dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
# Render the graph
dot.render("knowledge_graph.gv", view=True)
# Main execution logic
if __name__ == "__main__":
user_id = "User1"
query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
# Generate the knowledge graph from the user input
# knowledge_graph = generate_graph(query_input)
# out = knowledge_graph.dict()
# print(out)
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
print(graph.dict())
visualize_knowledge_graph(graph)
# Translate the KnowledgeGraph into Cypher queries
# cypher_query = create_cypher_queries_from_graph(out['graph_query'], user_id)
# print(cypher_query)
# #
# # # Execute the Cypher queries to create the graph in Neo4j
# execute_cypher_query(cypher_query)
# # Refresh the graph schema
# graph.refresh_schema()
#
# # Print the schema to the console
# print(graph.schema)