Added dynamic graph creation
This commit is contained in:
parent
0b6f9b0dca
commit
ceba0d31e7
23 changed files with 984 additions and 445 deletions
102
README.md
102
README.md
|
|
@ -149,110 +149,8 @@ Run
|
|||
|
||||
|
||||
|
||||
### Run the level 3
|
||||
|
||||
Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed.
|
||||
|
||||
Copy the .env.example to .env and fill in the variables
|
||||
|
||||
|
||||
Two ways to run the level 3:
|
||||
|
||||
#### Docker:
|
||||
|
||||
Copy the .env.template to .env and fill in the variables
|
||||
Specify the environment variable in the .env file to "docker"
|
||||
|
||||
|
||||
|
||||
|
||||
Launch the docker image:
|
||||
|
||||
```docker compose up promethai_mem ```
|
||||
|
||||
Send the request to the API:
|
||||
|
||||
```
|
||||
curl -X POST -H "Content-Type: application/json" -d '{
|
||||
"payload": {
|
||||
"user_id": "97980cfea0067",
|
||||
"data": [".data/3ZCCCW.pdf"],
|
||||
"test_set": "sample",
|
||||
"params": ["chunk_size"],
|
||||
"metadata": "sample",
|
||||
"retriever_type": "single_document_context"
|
||||
}
|
||||
}' http://0.0.0.0:8000/rag-test/rag_test_run
|
||||
|
||||
```
|
||||
Params:
|
||||
|
||||
- data -> list of URLs or path to the file, located in the .data folder (pdf, docx, txt, html)
|
||||
- test_set -> sample, manual (list of questions and answers)
|
||||
- metadata -> sample, manual (json) or version (in progress)
|
||||
- params -> chunk_size, chunk_overlap, search_type (hybrid, bm25), embeddings
|
||||
- retriever_type -> llm_context, single_document_context, multi_document_context, cognitive_architecture(coming soon)
|
||||
|
||||
Inspect the results in the DB:
|
||||
|
||||
``` docker exec -it postgres psql -U bla ```
|
||||
|
||||
``` \c bubu ```
|
||||
|
||||
``` select * from test_outputs; ```
|
||||
|
||||
Or set up the superset to visualize the results.
|
||||
The base SQL query is in the example_data folder.
|
||||
|
||||
|
||||
|
||||
#### Poetry environment:
|
||||
|
||||
|
||||
Copy the .env.template to .env and fill in the variables
|
||||
Specify the environment variable in the .env file to "local"
|
||||
|
||||
Use the poetry environment:
|
||||
|
||||
``` poetry shell ```
|
||||
|
||||
Change the .env file Environment variable to "local"
|
||||
|
||||
Launch the postgres DB
|
||||
|
||||
``` docker compose up postgres ```
|
||||
|
||||
Launch the superset
|
||||
|
||||
``` docker compose up superset ```
|
||||
|
||||
Open the superset in your browser
|
||||
|
||||
``` http://localhost:8088 ```
|
||||
Add the Postgres datasource to the Superset with the following connection string:
|
||||
|
||||
``` postgres://bla:bla@postgres:5432/bubu ```
|
||||
|
||||
Make sure to run to initialize DB tables
|
||||
|
||||
``` python scripts/create_database.py ```
|
||||
|
||||
After that, you can run the RAG test manager from your command line.
|
||||
|
||||
|
||||
```
|
||||
python rag_test_manager.py \
|
||||
--file ".data" \
|
||||
--test_set "example_data/test_set.json" \
|
||||
--user_id "97980cfea0067" \
|
||||
--params "chunk_size" "search_type" \
|
||||
--metadata "example_data/metadata.json" \
|
||||
--retriever_type "single_document_context"
|
||||
|
||||
```
|
||||
|
||||
Examples of metadata structure and test set are in the folder "example_data"
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,4 +7,8 @@ POSTGRES_PASSWORD = bla
|
|||
POSTGRES_DB = bubu
|
||||
POSTGRES_HOST = localhost
|
||||
POSTGRES_HOST_DOCKER = postgres
|
||||
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
|
||||
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
|
||||
COG_ARCH_DIR = cognitive_architecture
|
||||
GRAPH_DB_URL =
|
||||
GRAPH_DB_PW =
|
||||
GRAPH_DB_USER =
|
||||
|
|
|
|||
0
level_4/cognitive_architecture/classifiers/__init__.py
Normal file
0
level_4/cognitive_architecture/classifiers/__init__.py
Normal file
15
level_4/cognitive_architecture/classifiers/classifier.py
Normal file
15
level_4/cognitive_architecture/classifiers/classifier.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
|
||||
|
||||
|
||||
#TO DO, ADD ALL CLASSIFIERS HERE
|
||||
|
||||
|
||||
|
||||
# classify retrievals according to type of retrieval
|
||||
def classify_retrieval():
|
||||
pass
|
||||
|
||||
|
||||
# classify documents according to type of document
|
||||
def classify_call():
|
||||
pass
|
||||
84
level_4/cognitive_architecture/config.py
Normal file
84
level_4/cognitive_architecture/config.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
import json
|
||||
import configparser
|
||||
import uuid
|
||||
from typing import Optional, List, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
# Load the .env file from the base directory
|
||||
dotenv_path = base_dir / '.env'
|
||||
load_dotenv(dotenv_path=dotenv_path)
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
# Paths and Directories
|
||||
memgpt_dir: str = field(default_factory=lambda: os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'))
|
||||
config_path: str = field(default_factory=lambda: os.path.join(os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'), 'config'))
|
||||
|
||||
# Model parameters
|
||||
model: str = 'gpt-4-1106-preview'
|
||||
model_endpoint: str = 'openai'
|
||||
openai_key: Optional[str] = os.getenv('OPENAI_API_KEY')
|
||||
|
||||
# Embedding parameters
|
||||
embedding_model: str = 'openai'
|
||||
embedding_dim: int = 1536
|
||||
embedding_chunk_size: int = 300
|
||||
|
||||
# Database parameters
|
||||
graph_database_url: str = os.getenv('GRAPH_DB_URL')
|
||||
graph_database_username: str = os.getenv('GRAPH_DB_USER')
|
||||
graph_database_password: str = os.getenv('GRAPH_DB_PW')
|
||||
|
||||
|
||||
# Client ID
|
||||
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
|
||||
def load(self):
|
||||
"""Loads the configuration from a file or environment variables."""
|
||||
config = configparser.ConfigParser()
|
||||
config.read(self.config_path)
|
||||
|
||||
# Override with environment variables if they exist
|
||||
for attr in self.__annotations__:
|
||||
env_value = os.getenv(attr.upper())
|
||||
if env_value is not None:
|
||||
setattr(self, attr, env_value)
|
||||
|
||||
# Load from config file
|
||||
if config.sections():
|
||||
for section in config.sections():
|
||||
for key, value in config.items(section):
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
|
||||
def save(self):
|
||||
"""Saves the current configuration to a file."""
|
||||
config = configparser.ConfigParser()
|
||||
|
||||
# Save the current settings to the config file
|
||||
for attr, value in self.__dict__.items():
|
||||
section, option = attr.split('_', 1)
|
||||
if not config.has_section(section):
|
||||
config.add_section(section)
|
||||
config.set(section, option, str(value))
|
||||
|
||||
with open(self.config_path, 'w') as configfile:
|
||||
config.write(configfile)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Returns a dictionary representation of the configuration."""
|
||||
return {attr: getattr(self, attr) for attr in self.__annotations__}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
|
||||
"""Creates a Config instance from a dictionary."""
|
||||
config = cls()
|
||||
for attr, value in config_dict.items():
|
||||
if hasattr(config, attr):
|
||||
setattr(config, attr, value)
|
||||
return config
|
||||
|
|
@ -23,4 +23,18 @@ async def session_scope(session):
|
|||
async def add_entity(session, entity):
|
||||
async with session_scope(session) as s: # Use your async session_scope
|
||||
s.add(entity) # No need to commit; session_scope takes care of it
|
||||
return "Successfully added entity"
|
||||
return "Successfully added entity"
|
||||
|
||||
|
||||
|
||||
async def update_entity(session, model, entity_id, new_value):
|
||||
async with session_scope(session) as s:
|
||||
# Retrieve the entity from the database
|
||||
entity = await s.get(model, entity_id)
|
||||
|
||||
if entity:
|
||||
# Update the relevant column and 'updated_at' will be automatically updated
|
||||
entity.operation_status = new_value
|
||||
return "Successfully updated entity"
|
||||
else:
|
||||
return "Entity not found"
|
||||
588
level_4/cognitive_architecture/graph_database/graph.py
Normal file
588
level_4/cognitive_architecture/graph_database/graph.py
Normal file
|
|
@ -0,0 +1,588 @@
|
|||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
|
||||
import typer
|
||||
import os
|
||||
import uuid
|
||||
# import marvin
|
||||
# from pydantic_settings import BaseSettings
|
||||
from langchain.chains import GraphCypherQAChain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
# from marvin import ai_classifier
|
||||
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
import os
|
||||
|
||||
print(os.getcwd())
|
||||
|
||||
|
||||
from ..models.sessions import (Session)
|
||||
from ..models.testset import TestSet
|
||||
from ..models.testoutput import TestOutput
|
||||
from ..models.metadatas import MetaDatas
|
||||
from ..models.operation import Operation
|
||||
from ..models.docs import DocsModel
|
||||
from ..models.memory import MemoryModel
|
||||
|
||||
from pathlib import Path
|
||||
import networkx as nx
|
||||
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.graphs import Neo4jGraph
|
||||
from langchain.text_splitter import TokenTextSplitter
|
||||
from langchain.vectorstores import Neo4jVector
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import uuid
|
||||
|
||||
from graphviz import Digraph
|
||||
|
||||
from ..database.database_crud import session_scope
|
||||
from ..database.database import AsyncSessionLocal
|
||||
|
||||
import openai
|
||||
import instructor
|
||||
|
||||
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
# Adds response_model to ChatCompletion
|
||||
# Allows the return of Pydantic model rather than raw JSON
|
||||
instructor.patch()
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from ..utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping
|
||||
DEFAULT_PRESET = "promethai_chat"
|
||||
preset_options = [DEFAULT_PRESET]
|
||||
import questionary
|
||||
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||
load_dotenv()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
from ..config import Config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
print(config.model)
|
||||
print(config.openai_key)
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
#Execute Cypher queries to create the user and memory components if they don't exist
|
||||
#
|
||||
# graph.query(
|
||||
# f"""
|
||||
# // Ensure the User node exists
|
||||
# MERGE (user:User {{ userId: {user} }})
|
||||
#
|
||||
# // Ensure the SemanticMemory node exists
|
||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
#
|
||||
# // Ensure the EpisodicMemory node exists
|
||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||
#
|
||||
# // Ensure the Buffer node exists
|
||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||
# """
|
||||
# )
|
||||
#
|
||||
# # Execute Cypher queries to create the cognitive components in the graph
|
||||
# graph.query(
|
||||
# f"""
|
||||
# // Parsing the query into components and linking them to the user and memory components
|
||||
# MERGE (user:User {{ userId: {user} }})
|
||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||
#
|
||||
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
|
||||
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
|
||||
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
|
||||
#
|
||||
# WITH user, semantic, episodic, buffer, action1, action2, time
|
||||
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
|
||||
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action1)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action2)
|
||||
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
|
||||
# """
|
||||
# )
|
||||
|
||||
|
||||
class Node(BaseModel):
|
||||
id: int
|
||||
description: str
|
||||
category: str
|
||||
color: str ="blue"
|
||||
memory_type: str
|
||||
|
||||
|
||||
|
||||
class Edge(BaseModel):
|
||||
source: int
|
||||
target: int
|
||||
description: str
|
||||
color: str= "blue"
|
||||
|
||||
|
||||
class KnowledgeGraph(BaseModel):
|
||||
nodes: List[Node] = Field(..., default_factory=list)
|
||||
edges: List[Edge] = Field(..., default_factory=list)
|
||||
|
||||
|
||||
#
|
||||
|
||||
def generate_graph(input) -> KnowledgeGraph:
|
||||
return openai.ChatCompletion.create(
|
||||
model="gpt-4-1106-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
||||
|
||||
},
|
||||
{ "role":"system", "content": """You are a top-tier algorithm
|
||||
designed for extracting information in structured formats to build a knowledge graph.
|
||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||
- The aim is to achieve simplicity and clarity in the
|
||||
knowledge graph, making it accessible for a vast audience.
|
||||
## 2. Labeling Nodes
|
||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||
- For example, when you identify an entity representing a person,
|
||||
always label it as **"person"**.
|
||||
Avoid using more specific terms like "mathematician" or "scientist".
|
||||
- Include event, entity, time, or action nodes to the category.
|
||||
- Classify the memory type as episodic or semantic.
|
||||
- **Node IDs**: Never utilize integers as node IDs.
|
||||
Node IDs should be names or human-readable identifiers found in the text.
|
||||
## 3. Handling Numerical Data and Dates
|
||||
- Numerical data, like age or other related information,
|
||||
should be incorporated as attributes or properties of the respective nodes.
|
||||
- **No Separate Nodes for Dates/Numbers**:
|
||||
Do not create separate nodes for dates or numerical values.
|
||||
Always attach them as attributes or properties of nodes.
|
||||
- **Property Format**: Properties must be in a key-value format.
|
||||
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||
## 4. Coreference Resolution
|
||||
- **Maintain Entity Consistency**:
|
||||
When extracting entities, it's vital to ensure consistency.
|
||||
If an entity, such as "John Doe", is mentioned multiple times
|
||||
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||
In this example, use "John Doe" as the entity ID.
|
||||
Remember, the knowledge graph should be coherent and easily understandable,
|
||||
so maintaining consistency in entity references is crucial.
|
||||
## 5. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
||||
],
|
||||
response_model=KnowledgeGraph,
|
||||
)
|
||||
|
||||
class AbstractGraphDB(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def query(self, query: str, params=None):
|
||||
pass
|
||||
|
||||
# @abstractmethod
|
||||
# def create_nodes(self, nodes: List[dict]):
|
||||
# pass
|
||||
#
|
||||
# @abstractmethod
|
||||
# def create_edges(self, edges: List[dict]):
|
||||
# pass
|
||||
#
|
||||
# @abstractmethod
|
||||
# def create_memory_type_relationships(self, nodes: List[dict], memory_type: str):
|
||||
# pass
|
||||
|
||||
|
||||
class Neo4jGraphDB(AbstractGraphDB):
|
||||
def __init__(self, url, username, password):
|
||||
self.graph = Neo4jGraph(url=url, username=username, password=password)
|
||||
self.openai_key = config.openai_key
|
||||
|
||||
|
||||
|
||||
def query(self, query, params=None):
|
||||
return self.graph.query(query, params)
|
||||
# Initialize the Neo4j connection here
|
||||
|
||||
|
||||
def create_base_cognitive_architecture(self, user_id: str):
|
||||
# Create the user and memory components if they don't exist
|
||||
user_memory_cypher = f"""
|
||||
MERGE (user:User {{userId: '{user_id}'}})
|
||||
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
|
||||
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
|
||||
MERGE (buffer:Buffer {{userId: '{user_id}'}})
|
||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||
MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||
"""
|
||||
|
||||
return user_memory_cypher
|
||||
|
||||
def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph:
|
||||
return openai.ChatCompletion.create(
|
||||
model=config.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
||||
|
||||
},
|
||||
{"role": "system", "content": """You are a top-tier algorithm
|
||||
designed for extracting information in structured formats to build a knowledge graph.
|
||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||
- The aim is to achieve simplicity and clarity in the
|
||||
knowledge graph, making it accessible for a vast audience.
|
||||
## 2. Labeling Nodes
|
||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||
- For example, when you identify an entity representing a person,
|
||||
always label it as **"person"**.
|
||||
Avoid using more specific terms like "mathematician" or "scientist".
|
||||
- Include event, entity, time, or action nodes to the category.
|
||||
- Classify the memory type as episodic or semantic.
|
||||
- **Node IDs**: Never utilize integers as node IDs.
|
||||
Node IDs should be names or human-readable identifiers found in the text.
|
||||
## 3. Handling Numerical Data and Dates
|
||||
- Numerical data, like age or other related information,
|
||||
should be incorporated as attributes or properties of the respective nodes.
|
||||
- **No Separate Nodes for Dates/Numbers**:
|
||||
Do not create separate nodes for dates or numerical values.
|
||||
Always attach them as attributes or properties of nodes.
|
||||
- **Property Format**: Properties must be in a key-value format.
|
||||
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||
## 4. Coreference Resolution
|
||||
- **Maintain Entity Consistency**:
|
||||
When extracting entities, it's vital to ensure consistency.
|
||||
If an entity, such as "John Doe", is mentioned multiple times
|
||||
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||
In this example, use "John Doe" as the entity ID.
|
||||
Remember, the knowledge graph should be coherent and easily understandable,
|
||||
so maintaining consistency in entity references is crucial.
|
||||
## 5. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
||||
],
|
||||
response_model=KnowledgeGraph,
|
||||
)
|
||||
|
||||
def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping):
|
||||
create_statements = []
|
||||
for node in nodes:
|
||||
original_variable_name = base_node_mapping[node['id']]
|
||||
unique_variable_name = unique_mapping[original_variable_name]
|
||||
node_label = node['category'].capitalize()
|
||||
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
|
||||
try:
|
||||
properties = format_dict(properties)
|
||||
except:
|
||||
pass
|
||||
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
|
||||
return create_statements
|
||||
|
||||
# Update the function to generate Cypher CREATE statements for edges with unique variable names
|
||||
def generate_create_statements_for_edges_with_uuid(self, edges, unique_mapping, base_node_mapping):
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
for edge in edges:
|
||||
# print("HERE IS THE EDGE", edge)
|
||||
source_variable = unique_mapping[base_node_mapping[edge['source']]]
|
||||
target_variable = unique_mapping[base_node_mapping[edge['target']]]
|
||||
relationship = edge['description'].replace(" ", "_").upper()
|
||||
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
|
||||
return create_statements
|
||||
|
||||
def generate_memory_type_relationships_with_uuid_and_time_context(self, nodes, unique_mapping, base_node_mapping):
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
# Loop through each node and create relationships based on memory_type
|
||||
for node in nodes:
|
||||
original_variable_name = base_node_mapping[node['id']]
|
||||
unique_variable_name = unique_mapping[original_variable_name]
|
||||
if node['memory_type'] == 'semantic':
|
||||
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
|
||||
elif node['memory_type'] == 'episodic':
|
||||
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
|
||||
if node['category'] == 'time':
|
||||
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
|
||||
|
||||
# Assuming buffer holds all actions and times
|
||||
# if node['category'] in ['action', 'time']:
|
||||
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
|
||||
|
||||
return create_statements
|
||||
|
||||
def generate_cypher_query_for_user_prompt_decomposition(self, user_id):
|
||||
|
||||
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
||||
graph_dic = graph.dict()
|
||||
|
||||
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
||||
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
||||
# Create unique variable names for each node
|
||||
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
||||
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
||||
create_nodes_statements = self.generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
|
||||
create_edges_statements =self.generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping, node_variable_mapping)
|
||||
|
||||
memory_type_statements_with_uuid_and_time_context = self.generate_memory_type_relationships_with_uuid_and_time_context(
|
||||
graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
|
||||
|
||||
# # Combine all statements
|
||||
cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||
cypher_statements_joined = "\n".join(cypher_statements)
|
||||
return cypher_statements_joined
|
||||
|
||||
|
||||
def update_user_query_for_user_prompt_decomposition(self, user_id, user_query):
|
||||
pass
|
||||
|
||||
|
||||
def delete_all_user_memories(self, user_id):
|
||||
try:
|
||||
# Check if the user exists
|
||||
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
if not user_exists:
|
||||
return f"No user found with ID: {user_id}"
|
||||
|
||||
# Delete all memory nodes and relationships for the given user
|
||||
delete_query = f"""
|
||||
MATCH (user:User {{userId: '{user_id}'}})-[r]-()
|
||||
DELETE r
|
||||
WITH user
|
||||
MATCH (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
MATCH (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||
MATCH (user)-[:HAS_BUFFER]->(buffer)
|
||||
DETACH DELETE semantic, episodic, buffer
|
||||
"""
|
||||
self.graph.query(delete_query)
|
||||
return f"All memories deleted for user ID: {user_id}"
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
|
||||
def delete_specific_memory_type(self, user_id, memory_type):
|
||||
try:
|
||||
# Check if the user exists
|
||||
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
if not user_exists:
|
||||
return f"No user found with ID: {user_id}"
|
||||
|
||||
# Validate memory type
|
||||
if memory_type not in ['SemanticMemory', 'EpisodicMemory', 'Buffer']:
|
||||
return "Invalid memory type. Choose from 'SemanticMemory', 'EpisodicMemory', or 'Buffer'."
|
||||
|
||||
# Delete specific memory type nodes and relationships for the given user
|
||||
delete_query = f"""
|
||||
MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory)
|
||||
DETACH DELETE memory
|
||||
"""
|
||||
self.graph.query(delete_query)
|
||||
return f"{memory_type} deleted for user ID: {user_id}"
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
def retrieve_semantic_memory(self, user_id: str):
|
||||
query = """
|
||||
MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)
|
||||
MATCH (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
||||
RETURN knowledge
|
||||
"""
|
||||
return self.query(query, params={"user_id": user_id})
|
||||
|
||||
def retrieve_episodic_memory(self, user_id: str):
|
||||
query = """
|
||||
MATCH (user:User {userId: $user_id})-[:HAS_EPISODIC_MEMORY]->(episodic:EpisodicMemory)
|
||||
MATCH (episodic)-[:HAS_EVENT]->(event)
|
||||
RETURN event
|
||||
"""
|
||||
return self.query(query, params={"user_id": user_id})
|
||||
|
||||
def retrieve_buffer_memory(self, user_id: str):
|
||||
query = """
|
||||
MATCH (user:User {userId: $user_id})-[:HAS_BUFFER]->(buffer:Buffer)
|
||||
MATCH (buffer)-[:CURRENTLY_HOLDING]->(item)
|
||||
RETURN item
|
||||
"""
|
||||
return self.query(query, params={"user_id": user_id})
|
||||
def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
|
||||
""" This function takes a document and generates a document summary in Semantic Memory"""
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
# Loop through each node and create relationships based on memory_type
|
||||
|
||||
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
|
||||
|
||||
|
||||
return create_statements
|
||||
|
||||
|
||||
def generate_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
|
||||
""" This function takes a document and generates a document summary in Semantic Memory"""
|
||||
|
||||
|
||||
# fetch namespace from postgres db
|
||||
# fetch 1st and last page from vector store
|
||||
# summarize the text, add document type
|
||||
# write to postgres
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
# Loop through each node and create relationships based on memory_type
|
||||
|
||||
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
|
||||
|
||||
|
||||
return create_statements
|
||||
|
||||
|
||||
|
||||
|
||||
def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str:
|
||||
"""
|
||||
Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user.
|
||||
|
||||
Parameters:
|
||||
- document_summary (dict): A dictionary containing the document's category, title, and summary.
|
||||
- user_id (str): The unique identifier for the user.
|
||||
|
||||
Returns:
|
||||
- str: A Cypher query string with parameters.
|
||||
|
||||
Raises:
|
||||
- ValueError: If any required data is missing or invalid.
|
||||
"""
|
||||
|
||||
# Validate the input parameters
|
||||
if not isinstance(document_summary, dict):
|
||||
raise ValueError("The document_summary must be a dictionary.")
|
||||
if not all(key in document_summary for key in ['document_category', 'title', 'summary']):
|
||||
raise ValueError("The document_summary dictionary is missing required keys.")
|
||||
if not isinstance(user_id, str) or not user_id:
|
||||
raise ValueError("The user_id must be a non-empty string.")
|
||||
|
||||
# Escape single quotes in the document summary data (if not using parameters)
|
||||
# title = document_summary['title'].replace("'", "\\'")
|
||||
# summary = document_summary['summary'].replace("'", "\\'")
|
||||
# document_category = document_summary['document_category'].replace("'", "\\'")
|
||||
|
||||
# Generate the Cypher query using parameters
|
||||
cypher_query = f'''
|
||||
// Ensure the User node exists
|
||||
MERGE (user:User {{ userId: $user_id }})
|
||||
|
||||
// Ensure the SemanticMemory node exists and is connected to the User
|
||||
MERGE (semantic:SemanticMemory {{ userId: $user_id }})
|
||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
|
||||
// Create the Document node with its properties
|
||||
CREATE (document:Document {{
|
||||
title: $title,
|
||||
summary: $summary,
|
||||
documentCategory: $document_category
|
||||
}})
|
||||
|
||||
// Link the Document node to the SemanticMemory node
|
||||
CREATE (semantic)-[:HAS_DOCUMENT]->(document)
|
||||
'''
|
||||
|
||||
return cypher_query
|
||||
|
||||
|
||||
|
||||
def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str):
|
||||
# Generate the Cypher query
|
||||
cypher_query = f'''
|
||||
MATCH (user:User {{userId: $user_id}})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
|
||||
SET document.vectordbNamespace = $vectordb_namespace
|
||||
RETURN document
|
||||
'''
|
||||
|
||||
# Parameters for the query
|
||||
parameters = {
|
||||
'user_id': user_id,
|
||||
'vectordb_namespace': vectordb_namespace
|
||||
}
|
||||
|
||||
# Execute the query with the provided parameters
|
||||
result = self.query(cypher_query, parameters)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class NetworkXGraphDB(AbstractGraphDB):
|
||||
def __init__(self):
|
||||
self.graph = nx.Graph()
|
||||
# Initialize other necessary properties or configurations
|
||||
|
||||
def create_base_cognitive_architecture(self, user_id: str):
|
||||
# Add nodes for user and memory types if they don't exist
|
||||
self.graph.add_node(user_id, type='User')
|
||||
self.graph.add_node(f"{user_id}_semantic", type='SemanticMemory')
|
||||
self.graph.add_node(f"{user_id}_episodic", type='EpisodicMemory')
|
||||
self.graph.add_node(f"{user_id}_buffer", type='Buffer')
|
||||
|
||||
# Add edges to connect user to memory types
|
||||
self.graph.add_edge(user_id, f"{user_id}_semantic", relation='HAS_SEMANTIC_MEMORY')
|
||||
self.graph.add_edge(user_id, f"{user_id}_episodic", relation='HAS_EPISODIC_MEMORY')
|
||||
self.graph.add_edge(user_id, f"{user_id}_buffer", relation='HAS_BUFFER')
|
||||
|
||||
def delete_all_user_memories(self, user_id: str):
|
||||
# Remove nodes and edges related to the user's memories
|
||||
for memory_type in ['semantic', 'episodic', 'buffer']:
|
||||
memory_node = f"{user_id}_{memory_type}"
|
||||
self.graph.remove_node(memory_node)
|
||||
|
||||
def delete_specific_memory_type(self, user_id: str, memory_type: str):
|
||||
# Remove a specific type of memory node and its related edges
|
||||
memory_node = f"{user_id}_{memory_type.lower()}"
|
||||
if memory_node in self.graph:
|
||||
self.graph.remove_node(memory_node)
|
||||
|
||||
# Methods for retrieving semantic, episodic, and buffer memories
|
||||
def retrieve_semantic_memory(self, user_id: str):
|
||||
return [n for n in self.graph.neighbors(f"{user_id}_semantic")]
|
||||
|
||||
def retrieve_episodic_memory(self, user_id: str):
|
||||
return [n for n in self.graph.neighbors(f"{user_id}_episodic")]
|
||||
|
||||
def retrieve_buffer_memory(self, user_id: str):
|
||||
return [n for n in self.graph.neighbors(f"{user_id}_buffer")]
|
||||
|
||||
class GraphDBFactory:
|
||||
def create_graph_db(self, db_type, **kwargs):
|
||||
if db_type == 'neo4j':
|
||||
return Neo4jGraphDB(**kwargs)
|
||||
elif db_type == 'networkx':
|
||||
return NetworkXGraphDB(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported database type: {db_type}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
class DocsModel(Base):
|
||||
__tablename__ = 'docs'
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
class MemoryModel(Base):
|
||||
__tablename__ = 'memories'
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class MetaDatas(Base):
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class Operation(Base):
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class Session(Base):
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class TestOutput(Base):
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class TestSet(Base):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base
|
|||
import os
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from database.database import Base
|
||||
from ..database.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
|
|
|
|||
10
level_4/cognitive_architecture/presets.py
Normal file
10
level_4/cognitive_architecture/presets.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
DEFAULT_PRESET = "cognitive_architecture_chat"
|
||||
preset_options = [DEFAULT_PRESET]
|
||||
|
||||
|
||||
|
||||
def use_preset():
|
||||
"""Placeholder for different present options"""
|
||||
|
||||
pass
|
||||
0
level_4/cognitive_architecture/shared/__init__.py
Normal file
0
level_4/cognitive_architecture/shared/__init__.py
Normal file
7
level_4/cognitive_architecture/shared/chunk_strategy.py
Normal file
7
level_4/cognitive_architecture/shared/chunk_strategy.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
from enum import Enum
|
||||
|
||||
class ChunkStrategy(Enum):
|
||||
EXACT = 'exact'
|
||||
PARAGRAPH = 'paragraph'
|
||||
SENTENCE = 'sentence'
|
||||
VANILLA = 'vanilla'
|
||||
79
level_4/cognitive_architecture/utils.py
Normal file
79
level_4/cognitive_architecture/utils.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import uuid
|
||||
|
||||
from graphviz import Digraph
|
||||
|
||||
# from graph_database.graph import KnowledgeGraph
|
||||
|
||||
|
||||
class Node:
|
||||
def __init__(self, id, description, color):
|
||||
self.id = id
|
||||
self.description = description
|
||||
self.color = color
|
||||
|
||||
class Edge:
|
||||
def __init__(self, source, target, label, color):
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.label = label
|
||||
self.color = color
|
||||
# def visualize_knowledge_graph(kg: KnowledgeGraph):
|
||||
# dot = Digraph(comment="Knowledge Graph")
|
||||
#
|
||||
# # Add nodes
|
||||
# for node in kg.nodes:
|
||||
# dot.node(str(node.id), node.description, color=node.color)
|
||||
#
|
||||
# # Add edges
|
||||
# for edge in kg.edges:
|
||||
# dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
|
||||
#
|
||||
# # Render the graph
|
||||
# dot.render("knowledge_graph.gv", view=True)
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
def format_dict(d):
|
||||
# Initialize an empty list to store formatted items
|
||||
formatted_items = []
|
||||
|
||||
# Iterate through all key-value pairs
|
||||
for key, value in d.items():
|
||||
# Format key-value pairs with a colon and space, and adding quotes for string values
|
||||
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
|
||||
formatted_items.append(formatted_item)
|
||||
|
||||
# Join all formatted items with a comma and a space
|
||||
formatted_string = ", ".join(formatted_items)
|
||||
|
||||
# Add curly braces to mimic a dictionary
|
||||
formatted_string = f"{{{formatted_string}}}"
|
||||
|
||||
return formatted_string
|
||||
|
||||
|
||||
def append_uuid_to_variable_names(variable_mapping):
|
||||
unique_variable_mapping = {}
|
||||
for original_name in variable_mapping.values():
|
||||
unique_name = f"{original_name}_{uuid.uuid4().hex}"
|
||||
unique_variable_mapping[original_name] = unique_name
|
||||
return unique_variable_mapping
|
||||
|
||||
|
||||
# Update the functions to use the unique variable names
|
||||
def create_node_variable_mapping(nodes):
|
||||
mapping = {}
|
||||
for node in nodes:
|
||||
variable_name = f"{node['category']}{node['id']}".lower()
|
||||
mapping[node['id']] = variable_name
|
||||
return mapping
|
||||
|
||||
|
||||
def create_edge_variable_mapping(edges):
|
||||
mapping = {}
|
||||
for edge in edges:
|
||||
# Construct a unique identifier for the edge
|
||||
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
|
||||
mapping[(edge['source'], edge['target'])] = variable_name
|
||||
return mapping
|
||||
|
|
@ -4,7 +4,7 @@ from io import BytesIO
|
|||
import os, sys
|
||||
# Add the parent directory to sys.path
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
|
||||
from ..vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
|
||||
import sqlalchemy as sa
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
import marvin
|
||||
|
|
@ -13,13 +13,13 @@ from dotenv import load_dotenv
|
|||
from langchain.document_loaders import PyPDFLoader
|
||||
from langchain.retrievers import WeaviateHybridSearchRetriever
|
||||
from weaviate.gql.get import HybridFusion
|
||||
from models.sessions import Session
|
||||
from models.testset import TestSet
|
||||
from models.testoutput import TestOutput
|
||||
from models.metadatas import MetaDatas
|
||||
from models.operation import Operation
|
||||
from ..models.sessions import Session
|
||||
from ..models.testset import TestSet
|
||||
from ..models.testoutput import TestOutput
|
||||
from ..models.metadatas import MetaDatas
|
||||
from ..models.operation import Operation
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from database.database import engine
|
||||
from ..database.database import engine
|
||||
load_dotenv()
|
||||
from typing import Optional
|
||||
import time
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from langchain.document_loaders import PyPDFLoader
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
from shared.chunk_strategy import ChunkStrategy
|
||||
from ..shared.chunk_strategy import ChunkStrategy
|
||||
import re
|
||||
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
|
||||
|
||||
|
|
|
|||
490
level_4/main.py
490
level_4/main.py
|
|
@ -9,23 +9,14 @@ from langchain.chains import GraphCypherQAChain
|
|||
from langchain.chat_models import ChatOpenAI
|
||||
# from marvin import ai_classifier
|
||||
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||
DEFAULT_PRESET = "promethai_chat"
|
||||
preset_options = [DEFAULT_PRESET]
|
||||
import questionary
|
||||
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||
|
||||
|
||||
|
||||
def create_config_dir():
|
||||
if not os.path.exists(PROMETHAI_DIR):
|
||||
os.makedirs(PROMETHAI_DIR, exist_ok=True)
|
||||
|
||||
folders = ["personas", "humans", "archival", "agents"]
|
||||
for folder in folders:
|
||||
if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)):
|
||||
os.makedirs(os.path.join(PROMETHAI_DIR, folder))
|
||||
|
||||
|
||||
from cognitive_architecture.models.sessions import Session
|
||||
from cognitive_architecture.models.testset import TestSet
|
||||
from cognitive_architecture.models.testoutput import TestOutput
|
||||
from cognitive_architecture.models.metadatas import MetaDatas
|
||||
from cognitive_architecture.models.operation import Operation
|
||||
from cognitive_architecture.models.docs import DocsModel
|
||||
from cognitive_architecture.models.memory import MemoryModel
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -40,14 +31,8 @@ import uuid
|
|||
|
||||
from graphviz import Digraph
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
|
||||
txt_path = "dune.txt"
|
||||
|
||||
|
||||
from cognitive_architecture.database.database_crud import session_scope
|
||||
from cognitive_architecture.database.database import AsyncSessionLocal
|
||||
|
||||
import openai
|
||||
import instructor
|
||||
|
|
@ -57,333 +42,188 @@ import instructor
|
|||
instructor.patch()
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
DEFAULT_PRESET = "promethai_chat"
|
||||
preset_options = [DEFAULT_PRESET]
|
||||
import questionary
|
||||
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||
load_dotenv()
|
||||
|
||||
class Node(BaseModel):
|
||||
id: int
|
||||
description: str
|
||||
category: str
|
||||
color: str ="blue"
|
||||
memory_type: str
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
from cognitive_architecture.config import Config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
print(config.model)
|
||||
print(config.openai_key)
|
||||
|
||||
|
||||
|
||||
class Edge(BaseModel):
|
||||
source: int
|
||||
target: int
|
||||
description: str
|
||||
color: str= "blue"
|
||||
|
||||
|
||||
class KnowledgeGraph(BaseModel):
|
||||
nodes: List[Node] = Field(..., default_factory=list)
|
||||
edges: List[Edge] = Field(..., default_factory=list)
|
||||
|
||||
|
||||
#
|
||||
|
||||
def generate_graph(input) -> KnowledgeGraph:
|
||||
return openai.ChatCompletion.create(
|
||||
model="gpt-4-1106-preview",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
||||
|
||||
},
|
||||
{ "role":"system", "content": """You are a top-tier algorithm
|
||||
designed for extracting information in structured formats to build a knowledge graph.
|
||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||
- The aim is to achieve simplicity and clarity in the
|
||||
knowledge graph, making it accessible for a vast audience.
|
||||
## 2. Labeling Nodes
|
||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||
- For example, when you identify an entity representing a person,
|
||||
always label it as **"person"**.
|
||||
Avoid using more specific terms like "mathematician" or "scientist".
|
||||
- Include event, entity, time, or action nodes to the category.
|
||||
- Classify the memory type as episodic or semantic.
|
||||
- **Node IDs**: Never utilize integers as node IDs.
|
||||
Node IDs should be names or human-readable identifiers found in the text.
|
||||
## 3. Handling Numerical Data and Dates
|
||||
- Numerical data, like age or other related information,
|
||||
should be incorporated as attributes or properties of the respective nodes.
|
||||
- **No Separate Nodes for Dates/Numbers**:
|
||||
Do not create separate nodes for dates or numerical values.
|
||||
Always attach them as attributes or properties of nodes.
|
||||
- **Property Format**: Properties must be in a key-value format.
|
||||
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||
## 4. Coreference Resolution
|
||||
- **Maintain Entity Consistency**:
|
||||
When extracting entities, it's vital to ensure consistency.
|
||||
If an entity, such as "John Doe", is mentioned multiple times
|
||||
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||
In this example, use "John Doe" as the entity ID.
|
||||
Remember, the knowledge graph should be coherent and easily understandable,
|
||||
so maintaining consistency in entity references is crucial.
|
||||
## 5. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
||||
],
|
||||
response_model=KnowledgeGraph,
|
||||
)
|
||||
import logging
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def execute_cypher_query(query: str):
|
||||
graph_ = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein")
|
||||
graph_.query(query)
|
||||
# This is a placeholder for the logic that will execute the Cypher query
|
||||
# You would replace this with the actual logic to run the query in your Neo4j database
|
||||
print(query)
|
||||
|
||||
#Execute Cypher queries to create the user and memory components if they don't exist
|
||||
#
|
||||
# graph.query(
|
||||
# f"""
|
||||
# // Ensure the User node exists
|
||||
# MERGE (user:User {{ userId: {user} }})
|
||||
#
|
||||
# // Ensure the SemanticMemory node exists
|
||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
#
|
||||
# // Ensure the EpisodicMemory node exists
|
||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||
#
|
||||
# // Ensure the Buffer node exists
|
||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||
# MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||
# """
|
||||
# )
|
||||
#
|
||||
# # Execute Cypher queries to create the cognitive components in the graph
|
||||
# graph.query(
|
||||
# f"""
|
||||
# // Parsing the query into components and linking them to the user and memory components
|
||||
# MERGE (user:User {{ userId: {user} }})
|
||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||
#
|
||||
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
|
||||
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
|
||||
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
|
||||
#
|
||||
# WITH user, semantic, episodic, buffer, action1, action2, time
|
||||
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
|
||||
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action1)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action2)
|
||||
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
|
||||
# """
|
||||
# )
|
||||
|
||||
|
||||
|
||||
|
||||
import asyncio
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
class Node:
|
||||
def __init__(self, id, description, color):
|
||||
self.id = id
|
||||
self.description = description
|
||||
self.color = color
|
||||
async def get_vectordb_namespace(session: AsyncSession, user_id: str):
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(MemoryModel.id).where(MemoryModel.user_id == user_id).order_by(MemoryModel.created_at.desc()).limit(1)
|
||||
)
|
||||
namespace = result.scalar_one_or_none()
|
||||
return namespace
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while retrieving the Vectordb_namespace: {str(e)}")
|
||||
return None
|
||||
|
||||
class Edge:
|
||||
def __init__(self, source, target, label, color):
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.label = label
|
||||
self.color = color
|
||||
def visualize_knowledge_graph(kg: KnowledgeGraph):
|
||||
dot = Digraph(comment="Knowledge Graph")
|
||||
|
||||
# Add nodes
|
||||
for node in kg.nodes:
|
||||
dot.node(str(node.id), node.description, color=node.color)
|
||||
|
||||
# Add edges
|
||||
for edge in kg.edges:
|
||||
dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
|
||||
|
||||
# Render the graph
|
||||
dot.render("knowledge_graph.gv", view=True)
|
||||
# async def retrieve_job_by_id(session, user_id, job_id):
|
||||
# try:
|
||||
# result = await session.execute(
|
||||
# session.query(Session.id)
|
||||
# .filter_by(user_id=user_id, id=job_id)
|
||||
# .order_by(Session.created_at)
|
||||
# )
|
||||
# return result.scalar_one_or_none()
|
||||
# except Exception as e:
|
||||
# logging.error(f"An error occurred while retrieving the job: {str(e)}")
|
||||
# return None
|
||||
|
||||
|
||||
def create_base_queries_from_user( user_id: str):
|
||||
# Create the user and memory components if they don't exist
|
||||
user_memory_cypher = f"""
|
||||
MERGE (user:User {{userId: '{user_id}'}})
|
||||
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
|
||||
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
|
||||
MERGE (buffer:Buffer {{userId: '{user_id}'}})
|
||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||
MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||
|
||||
|
||||
async def update_document_vectordb_namespace(postgres_session: AsyncSession, user_id: str, namespace: str = None):
|
||||
"""
|
||||
Update the Document node with the Vectordb_namespace for the given user. If the namespace is not provided,
|
||||
it will be retrieved from the PostgreSQL database.
|
||||
|
||||
return user_memory_cypher
|
||||
Args:
|
||||
postgres_session (AsyncSession): The async session for connecting to the PostgreSQL database.
|
||||
user_id (str): The user's unique identifier.
|
||||
namespace (str, optional): The Vectordb_namespace. If None, it will be retrieved from the database.
|
||||
|
||||
# Function to append a UUID4 to the variable names to ensure uniqueness
|
||||
def append_uuid_to_variable_names(variable_mapping):
|
||||
unique_variable_mapping = {}
|
||||
for original_name in variable_mapping.values():
|
||||
unique_name = f"{original_name}_{uuid.uuid4().hex}"
|
||||
unique_variable_mapping[original_name] = unique_name
|
||||
return unique_variable_mapping
|
||||
Returns:
|
||||
The result of the update operation or None if an error occurred.
|
||||
"""
|
||||
vectordb_namespace = namespace
|
||||
|
||||
# Update the functions to use the unique variable names
|
||||
def create_node_variable_mapping(nodes):
|
||||
mapping = {}
|
||||
for node in nodes:
|
||||
variable_name = f"{node['category']}{node['id']}".lower()
|
||||
mapping[node['id']] = variable_name
|
||||
return mapping
|
||||
# Retrieve namespace from the database if not provided
|
||||
if vectordb_namespace is None:
|
||||
vectordb_namespace = await get_vectordb_namespace(postgres_session, user_id)
|
||||
if not vectordb_namespace:
|
||||
logging.error("Vectordb_namespace could not be retrieved.")
|
||||
return None
|
||||
|
||||
|
||||
def create_edge_variable_mapping(edges):
|
||||
mapping = {}
|
||||
for edge in edges:
|
||||
# Construct a unique identifier for the edge
|
||||
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
|
||||
mapping[(edge['source'], edge['target'])] = variable_name
|
||||
return mapping
|
||||
|
||||
|
||||
# Update the function to generate Cypher CREATE statements for nodes with unique variable names
|
||||
|
||||
|
||||
def format_dict(d):
|
||||
# Initialize an empty list to store formatted items
|
||||
formatted_items = []
|
||||
|
||||
# Iterate through all key-value pairs
|
||||
for key, value in d.items():
|
||||
# Format key-value pairs with a colon and space, and adding quotes for string values
|
||||
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
|
||||
formatted_items.append(formatted_item)
|
||||
|
||||
# Join all formatted items with a comma and a space
|
||||
formatted_string = ", ".join(formatted_items)
|
||||
|
||||
# Add curly braces to mimic a dictionary
|
||||
formatted_string = f"{{{formatted_string}}}"
|
||||
|
||||
return formatted_string
|
||||
def generate_create_statements_for_nodes_with_uuid(nodes, unique_mapping):
|
||||
create_statements = []
|
||||
for node in nodes:
|
||||
original_variable_name = node_variable_mapping[node['id']]
|
||||
unique_variable_name = unique_mapping[original_variable_name]
|
||||
node_label = node['category'].capitalize()
|
||||
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
|
||||
try:
|
||||
properties = format_dict(properties)
|
||||
except:
|
||||
pass
|
||||
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
|
||||
return create_statements
|
||||
|
||||
# Update the function to generate Cypher CREATE statements for edges with unique variable names
|
||||
def generate_create_statements_for_edges_with_uuid(edges, unique_mapping):
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
for edge in edges:
|
||||
# print("HERE IS THE EDGE", edge)
|
||||
source_variable = unique_mapping[node_variable_mapping[edge['source']]]
|
||||
target_variable = unique_mapping[node_variable_mapping[edge['target']]]
|
||||
relationship = edge['description'].replace(" ", "_").upper()
|
||||
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
|
||||
return create_statements
|
||||
|
||||
|
||||
# Update the function to generate Cypher CREATE statements for memory type relationships with unique variable names
|
||||
def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_mapping):
|
||||
create_statements = []
|
||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||
create_statements.append(with_statement)
|
||||
|
||||
# Loop through each node and create relationships based on memory_type
|
||||
for node in nodes:
|
||||
original_variable_name = node_variable_mapping[node['id']]
|
||||
unique_variable_name = unique_mapping[original_variable_name]
|
||||
if node['memory_type'] == 'semantic':
|
||||
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
|
||||
elif node['memory_type'] == 'episodic':
|
||||
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
|
||||
if node['category'] == 'time':
|
||||
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
|
||||
|
||||
# Assuming buffer holds all actions and times
|
||||
# if node['category'] in ['action', 'time']:
|
||||
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
|
||||
|
||||
return create_statements
|
||||
# Update the Document node in Neo4j with the namespace
|
||||
update_result = update_document_node_with_namespace(user_id, vectordb_namespace)
|
||||
return update_result
|
||||
|
||||
|
||||
|
||||
# Main execution logic
|
||||
if __name__ == "__main__":
|
||||
user_id = "User1"
|
||||
query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
|
||||
|
||||
# Generate the knowledge graph from the user input
|
||||
knowledge_graph = generate_graph(query_input)
|
||||
visualize_knowledge_graph(knowledge_graph)
|
||||
# out = knowledge_graph.dict()
|
||||
# print(out)
|
||||
# query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
|
||||
#
|
||||
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
||||
graph_dic = graph.dict()
|
||||
|
||||
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
||||
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
||||
# Create unique variable names for each node
|
||||
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
||||
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
||||
create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
|
||||
create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
|
||||
|
||||
memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
|
||||
graph_dic['nodes'], unique_node_variable_mapping)
|
||||
|
||||
# # Combine all statements
|
||||
cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||
cypher_statements_joined = "\n".join(cypher_statements)
|
||||
|
||||
|
||||
|
||||
execute_cypher_query(cypher_statements_joined)
|
||||
|
||||
|
||||
|
||||
# Translate the KnowledgeGraph into Cypher queries
|
||||
|
||||
|
||||
|
||||
# Make document summary in Semantic Memory
|
||||
# Document summary links to a Namespace in Vector Store
|
||||
# Categorize document types in Semantic Memory
|
||||
# Make a spine classifier that retrieves the relevant document namespaces from Vector Store
|
||||
# # Generate the knowledge graph from the user input
|
||||
# knowledge_graph = generate_graph(query_input)
|
||||
# visualize_knowledge_graph(knowledge_graph)
|
||||
# # out = knowledge_graph.dict()
|
||||
# # print(out)
|
||||
# #
|
||||
# graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
||||
# graph_dic = graph.dict()
|
||||
#
|
||||
# Connect document summary to chunks in Weaviate vector store
|
||||
# node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
||||
# edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
||||
# # Create unique variable names for each node
|
||||
# unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
||||
# unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
||||
# create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
|
||||
# create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
|
||||
#
|
||||
# memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
|
||||
# graph_dic['nodes'], unique_node_variable_mapping)
|
||||
#
|
||||
# # # Combine all statements
|
||||
# cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||
# cypher_statements_joined = "\n".join(cypher_statements)
|
||||
#
|
||||
#
|
||||
#
|
||||
# execute_cypher_query(cypher_statements_joined)
|
||||
|
||||
|
||||
|
||||
# print(cypher_query)
|
||||
# #
|
||||
# # # Execute the Cypher queries to create the graph in Neo4j
|
||||
# execute_cypher_query(cypher_query)
|
||||
# # Refresh the graph schema
|
||||
# graph.refresh_schema()
|
||||
# bartleby_summary = {
|
||||
# "document_category": "Classic Literature",
|
||||
# "title": "Bartleby, the Scrivener",
|
||||
# "summary": (
|
||||
# "Bartleby, the Scrivener: A Story of Wall Street' is a short story by Herman Melville "
|
||||
# "that tells the tale of Bartleby, a scrivener, or copyist, who works for a Manhattan "
|
||||
# "lawyer. Initially, Bartleby is a competent and industrious worker. However, one day, "
|
||||
# "when asked to proofread a document, he responds with what becomes his constant refrain "
|
||||
# "to any request: 'I would prefer not to.' As the story progresses, Bartleby becomes "
|
||||
# "increasingly passive, refusing not just work but also food and eventually life itself, "
|
||||
# "as he spirals into a state of passive resistance. The lawyer, the narrator of the story, "
|
||||
# "is both fascinated and frustrated by Bartleby's behavior. Despite attempts to understand "
|
||||
# "and help him, Bartleby remains an enigmatic figure, his motives and thoughts unexplained. "
|
||||
# "He is eventually evicted from the office and later found dead in a prison yard, having "
|
||||
# "preferred not to live. The story is a meditation on the themes of isolation, societal "
|
||||
# "obligation, and the inexplicable nature of human behavior."
|
||||
# )
|
||||
# }
|
||||
# rs = create_document_node_cypher(bartleby_summary, user_id)
|
||||
#
|
||||
# parameters = {
|
||||
# 'user_id': user_id,
|
||||
# 'title': bartleby_summary['title'],
|
||||
# 'summary': bartleby_summary['summary'],
|
||||
# 'document_category': bartleby_summary['document_category']
|
||||
# }
|
||||
#
|
||||
# execute_cypher_query(rs, parameters)
|
||||
#
|
||||
# # Print the schema to the console
|
||||
# print(graph.schema)
|
||||
# async def main():
|
||||
# user_id = "User1"
|
||||
#
|
||||
# async with session_scope(AsyncSessionLocal()) as session:
|
||||
# await update_document_vectordb_namespace(session, user_id)
|
||||
#
|
||||
# # print(rs)
|
||||
#
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
#
|
||||
# asyncio.run(main())
|
||||
#
|
||||
# # config = Config()
|
||||
# # config.load()
|
||||
# #
|
||||
# # print(config.model)
|
||||
# # print(config.openai_key)
|
||||
|
||||
|
||||
|
||||
async def main():
|
||||
user_id = "User1"
|
||||
from cognitive_architecture.graph_database.graph import Neo4jGraphDB
|
||||
# Example initialization (replace with your actual connection details)
|
||||
neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein')
|
||||
# Generate the Cypher query for a specific user
|
||||
user_id = 'user123' # Replace with the actual user ID
|
||||
cypher_query = neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id)
|
||||
# Execute the generated Cypher query
|
||||
result = neo4j_graph_db.query(cypher_query)
|
||||
|
||||
# async with session_scope(AsyncSessionLocal()) as session:
|
||||
# await update_document_vectordb_namespace(session, user_id)
|
||||
|
||||
# print(rs)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Reference in a new issue