Added dynamic graph creation
This commit is contained in:
parent
0b6f9b0dca
commit
ceba0d31e7
23 changed files with 984 additions and 445 deletions
102
README.md
102
README.md
|
|
@ -149,110 +149,8 @@ Run
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Run the level 3
|
|
||||||
|
|
||||||
Make sure you have Docker, Poetry, and Python 3.11 installed and postgres installed.
|
|
||||||
|
|
||||||
Copy the .env.example to .env and fill in the variables
|
|
||||||
|
|
||||||
|
|
||||||
Two ways to run the level 3:
|
|
||||||
|
|
||||||
#### Docker:
|
|
||||||
|
|
||||||
Copy the .env.template to .env and fill in the variables
|
|
||||||
Specify the environment variable in the .env file to "docker"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Launch the docker image:
|
|
||||||
|
|
||||||
```docker compose up promethai_mem ```
|
|
||||||
|
|
||||||
Send the request to the API:
|
|
||||||
|
|
||||||
```
|
|
||||||
curl -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"payload": {
|
|
||||||
"user_id": "97980cfea0067",
|
|
||||||
"data": [".data/3ZCCCW.pdf"],
|
|
||||||
"test_set": "sample",
|
|
||||||
"params": ["chunk_size"],
|
|
||||||
"metadata": "sample",
|
|
||||||
"retriever_type": "single_document_context"
|
|
||||||
}
|
|
||||||
}' http://0.0.0.0:8000/rag-test/rag_test_run
|
|
||||||
|
|
||||||
```
|
|
||||||
Params:
|
|
||||||
|
|
||||||
- data -> list of URLs or path to the file, located in the .data folder (pdf, docx, txt, html)
|
|
||||||
- test_set -> sample, manual (list of questions and answers)
|
|
||||||
- metadata -> sample, manual (json) or version (in progress)
|
|
||||||
- params -> chunk_size, chunk_overlap, search_type (hybrid, bm25), embeddings
|
|
||||||
- retriever_type -> llm_context, single_document_context, multi_document_context, cognitive_architecture(coming soon)
|
|
||||||
|
|
||||||
Inspect the results in the DB:
|
|
||||||
|
|
||||||
``` docker exec -it postgres psql -U bla ```
|
|
||||||
|
|
||||||
``` \c bubu ```
|
|
||||||
|
|
||||||
``` select * from test_outputs; ```
|
|
||||||
|
|
||||||
Or set up the superset to visualize the results.
|
|
||||||
The base SQL query is in the example_data folder.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### Poetry environment:
|
|
||||||
|
|
||||||
|
|
||||||
Copy the .env.template to .env and fill in the variables
|
|
||||||
Specify the environment variable in the .env file to "local"
|
|
||||||
|
|
||||||
Use the poetry environment:
|
|
||||||
|
|
||||||
``` poetry shell ```
|
|
||||||
|
|
||||||
Change the .env file Environment variable to "local"
|
|
||||||
|
|
||||||
Launch the postgres DB
|
|
||||||
|
|
||||||
``` docker compose up postgres ```
|
|
||||||
|
|
||||||
Launch the superset
|
|
||||||
|
|
||||||
``` docker compose up superset ```
|
|
||||||
|
|
||||||
Open the superset in your browser
|
|
||||||
|
|
||||||
``` http://localhost:8088 ```
|
|
||||||
Add the Postgres datasource to the Superset with the following connection string:
|
|
||||||
|
|
||||||
``` postgres://bla:bla@postgres:5432/bubu ```
|
|
||||||
|
|
||||||
Make sure to run to initialize DB tables
|
|
||||||
|
|
||||||
``` python scripts/create_database.py ```
|
|
||||||
|
|
||||||
After that, you can run the RAG test manager from your command line.
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
python rag_test_manager.py \
|
|
||||||
--file ".data" \
|
|
||||||
--test_set "example_data/test_set.json" \
|
|
||||||
--user_id "97980cfea0067" \
|
|
||||||
--params "chunk_size" "search_type" \
|
|
||||||
--metadata "example_data/metadata.json" \
|
|
||||||
--retriever_type "single_document_context"
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Examples of metadata structure and test set are in the folder "example_data"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,4 +7,8 @@ POSTGRES_PASSWORD = bla
|
||||||
POSTGRES_DB = bubu
|
POSTGRES_DB = bubu
|
||||||
POSTGRES_HOST = localhost
|
POSTGRES_HOST = localhost
|
||||||
POSTGRES_HOST_DOCKER = postgres
|
POSTGRES_HOST_DOCKER = postgres
|
||||||
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
|
SEGMENT_KEY = Etl4WJwzOkeDPAjaOXOMgyU16hO7mV7B
|
||||||
|
COG_ARCH_DIR = cognitive_architecture
|
||||||
|
GRAPH_DB_URL =
|
||||||
|
GRAPH_DB_PW =
|
||||||
|
GRAPH_DB_USER =
|
||||||
|
|
|
||||||
0
level_4/cognitive_architecture/classifiers/__init__.py
Normal file
0
level_4/cognitive_architecture/classifiers/__init__.py
Normal file
15
level_4/cognitive_architecture/classifiers/classifier.py
Normal file
15
level_4/cognitive_architecture/classifiers/classifier.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#TO DO, ADD ALL CLASSIFIERS HERE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# classify retrievals according to type of retrieval
|
||||||
|
def classify_retrieval():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# classify documents according to type of document
|
||||||
|
def classify_call():
|
||||||
|
pass
|
||||||
84
level_4/cognitive_architecture/config.py
Normal file
84
level_4/cognitive_architecture/config.py
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import configparser
|
||||||
|
import uuid
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
|
base_dir = Path(__file__).resolve().parent.parent
|
||||||
|
# Load the .env file from the base directory
|
||||||
|
dotenv_path = base_dir / '.env'
|
||||||
|
load_dotenv(dotenv_path=dotenv_path)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Config:
|
||||||
|
# Paths and Directories
|
||||||
|
memgpt_dir: str = field(default_factory=lambda: os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'))
|
||||||
|
config_path: str = field(default_factory=lambda: os.path.join(os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'), 'config'))
|
||||||
|
|
||||||
|
# Model parameters
|
||||||
|
model: str = 'gpt-4-1106-preview'
|
||||||
|
model_endpoint: str = 'openai'
|
||||||
|
openai_key: Optional[str] = os.getenv('OPENAI_API_KEY')
|
||||||
|
|
||||||
|
# Embedding parameters
|
||||||
|
embedding_model: str = 'openai'
|
||||||
|
embedding_dim: int = 1536
|
||||||
|
embedding_chunk_size: int = 300
|
||||||
|
|
||||||
|
# Database parameters
|
||||||
|
graph_database_url: str = os.getenv('GRAPH_DB_URL')
|
||||||
|
graph_database_username: str = os.getenv('GRAPH_DB_USER')
|
||||||
|
graph_database_password: str = os.getenv('GRAPH_DB_PW')
|
||||||
|
|
||||||
|
|
||||||
|
# Client ID
|
||||||
|
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
"""Loads the configuration from a file or environment variables."""
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read(self.config_path)
|
||||||
|
|
||||||
|
# Override with environment variables if they exist
|
||||||
|
for attr in self.__annotations__:
|
||||||
|
env_value = os.getenv(attr.upper())
|
||||||
|
if env_value is not None:
|
||||||
|
setattr(self, attr, env_value)
|
||||||
|
|
||||||
|
# Load from config file
|
||||||
|
if config.sections():
|
||||||
|
for section in config.sections():
|
||||||
|
for key, value in config.items(section):
|
||||||
|
if hasattr(self, key):
|
||||||
|
setattr(self, key, value)
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
"""Saves the current configuration to a file."""
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
|
||||||
|
# Save the current settings to the config file
|
||||||
|
for attr, value in self.__dict__.items():
|
||||||
|
section, option = attr.split('_', 1)
|
||||||
|
if not config.has_section(section):
|
||||||
|
config.add_section(section)
|
||||||
|
config.set(section, option, str(value))
|
||||||
|
|
||||||
|
with open(self.config_path, 'w') as configfile:
|
||||||
|
config.write(configfile)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Returns a dictionary representation of the configuration."""
|
||||||
|
return {attr: getattr(self, attr) for attr in self.__annotations__}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
|
||||||
|
"""Creates a Config instance from a dictionary."""
|
||||||
|
config = cls()
|
||||||
|
for attr, value in config_dict.items():
|
||||||
|
if hasattr(config, attr):
|
||||||
|
setattr(config, attr, value)
|
||||||
|
return config
|
||||||
|
|
@ -23,4 +23,18 @@ async def session_scope(session):
|
||||||
async def add_entity(session, entity):
|
async def add_entity(session, entity):
|
||||||
async with session_scope(session) as s: # Use your async session_scope
|
async with session_scope(session) as s: # Use your async session_scope
|
||||||
s.add(entity) # No need to commit; session_scope takes care of it
|
s.add(entity) # No need to commit; session_scope takes care of it
|
||||||
return "Successfully added entity"
|
return "Successfully added entity"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def update_entity(session, model, entity_id, new_value):
|
||||||
|
async with session_scope(session) as s:
|
||||||
|
# Retrieve the entity from the database
|
||||||
|
entity = await s.get(model, entity_id)
|
||||||
|
|
||||||
|
if entity:
|
||||||
|
# Update the relevant column and 'updated_at' will be automatically updated
|
||||||
|
entity.operation_status = new_value
|
||||||
|
return "Successfully updated entity"
|
||||||
|
else:
|
||||||
|
return "Entity not found"
|
||||||
588
level_4/cognitive_architecture/graph_database/graph.py
Normal file
588
level_4/cognitive_architecture/graph_database/graph.py
Normal file
|
|
@ -0,0 +1,588 @@
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
import typer
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
# import marvin
|
||||||
|
# from pydantic_settings import BaseSettings
|
||||||
|
from langchain.chains import GraphCypherQAChain
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
# from marvin import ai_classifier
|
||||||
|
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
print(os.getcwd())
|
||||||
|
|
||||||
|
|
||||||
|
from ..models.sessions import (Session)
|
||||||
|
from ..models.testset import TestSet
|
||||||
|
from ..models.testoutput import TestOutput
|
||||||
|
from ..models.metadatas import MetaDatas
|
||||||
|
from ..models.operation import Operation
|
||||||
|
from ..models.docs import DocsModel
|
||||||
|
from ..models.memory import MemoryModel
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import networkx as nx
|
||||||
|
|
||||||
|
from langchain.document_loaders import TextLoader
|
||||||
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||||
|
from langchain.graphs import Neo4jGraph
|
||||||
|
from langchain.text_splitter import TokenTextSplitter
|
||||||
|
from langchain.vectorstores import Neo4jVector
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from graphviz import Digraph
|
||||||
|
|
||||||
|
from ..database.database_crud import session_scope
|
||||||
|
from ..database.database import AsyncSessionLocal
|
||||||
|
|
||||||
|
import openai
|
||||||
|
import instructor
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
# Adds response_model to ChatCompletion
|
||||||
|
# Allows the return of Pydantic model rather than raw JSON
|
||||||
|
instructor.patch()
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List
|
||||||
|
from ..utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping
|
||||||
|
DEFAULT_PRESET = "promethai_chat"
|
||||||
|
preset_options = [DEFAULT_PRESET]
|
||||||
|
import questionary
|
||||||
|
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
|
from ..config import Config
|
||||||
|
|
||||||
|
config = Config()
|
||||||
|
config.load()
|
||||||
|
|
||||||
|
print(config.model)
|
||||||
|
print(config.openai_key)
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
#Execute Cypher queries to create the user and memory components if they don't exist
|
||||||
|
#
|
||||||
|
# graph.query(
|
||||||
|
# f"""
|
||||||
|
# // Ensure the User node exists
|
||||||
|
# MERGE (user:User {{ userId: {user} }})
|
||||||
|
#
|
||||||
|
# // Ensure the SemanticMemory node exists
|
||||||
|
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||||
|
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||||
|
#
|
||||||
|
# // Ensure the EpisodicMemory node exists
|
||||||
|
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||||
|
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||||
|
#
|
||||||
|
# // Ensure the Buffer node exists
|
||||||
|
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||||
|
# MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||||
|
# """
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# # Execute Cypher queries to create the cognitive components in the graph
|
||||||
|
# graph.query(
|
||||||
|
# f"""
|
||||||
|
# // Parsing the query into components and linking them to the user and memory components
|
||||||
|
# MERGE (user:User {{ userId: {user} }})
|
||||||
|
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
||||||
|
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
||||||
|
# MERGE (buffer:Buffer {{ userId: {user} }})
|
||||||
|
#
|
||||||
|
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
|
||||||
|
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
|
||||||
|
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
|
||||||
|
#
|
||||||
|
# WITH user, semantic, episodic, buffer, action1, action2, time
|
||||||
|
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
|
||||||
|
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
||||||
|
# CREATE (episodic)-[:HAS_EVENT]->(action1)
|
||||||
|
# CREATE (episodic)-[:HAS_EVENT]->(action2)
|
||||||
|
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
|
||||||
|
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
|
||||||
|
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
|
||||||
|
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
|
||||||
|
# """
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
class Node(BaseModel):
|
||||||
|
id: int
|
||||||
|
description: str
|
||||||
|
category: str
|
||||||
|
color: str ="blue"
|
||||||
|
memory_type: str
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Edge(BaseModel):
|
||||||
|
source: int
|
||||||
|
target: int
|
||||||
|
description: str
|
||||||
|
color: str= "blue"
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeGraph(BaseModel):
|
||||||
|
nodes: List[Node] = Field(..., default_factory=list)
|
||||||
|
edges: List[Edge] = Field(..., default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
|
||||||
|
def generate_graph(input) -> KnowledgeGraph:
|
||||||
|
return openai.ChatCompletion.create(
|
||||||
|
model="gpt-4-1106-preview",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
||||||
|
|
||||||
|
},
|
||||||
|
{ "role":"system", "content": """You are a top-tier algorithm
|
||||||
|
designed for extracting information in structured formats to build a knowledge graph.
|
||||||
|
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||||
|
- The aim is to achieve simplicity and clarity in the
|
||||||
|
knowledge graph, making it accessible for a vast audience.
|
||||||
|
## 2. Labeling Nodes
|
||||||
|
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||||
|
- For example, when you identify an entity representing a person,
|
||||||
|
always label it as **"person"**.
|
||||||
|
Avoid using more specific terms like "mathematician" or "scientist".
|
||||||
|
- Include event, entity, time, or action nodes to the category.
|
||||||
|
- Classify the memory type as episodic or semantic.
|
||||||
|
- **Node IDs**: Never utilize integers as node IDs.
|
||||||
|
Node IDs should be names or human-readable identifiers found in the text.
|
||||||
|
## 3. Handling Numerical Data and Dates
|
||||||
|
- Numerical data, like age or other related information,
|
||||||
|
should be incorporated as attributes or properties of the respective nodes.
|
||||||
|
- **No Separate Nodes for Dates/Numbers**:
|
||||||
|
Do not create separate nodes for dates or numerical values.
|
||||||
|
Always attach them as attributes or properties of nodes.
|
||||||
|
- **Property Format**: Properties must be in a key-value format.
|
||||||
|
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||||
|
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||||
|
## 4. Coreference Resolution
|
||||||
|
- **Maintain Entity Consistency**:
|
||||||
|
When extracting entities, it's vital to ensure consistency.
|
||||||
|
If an entity, such as "John Doe", is mentioned multiple times
|
||||||
|
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||||
|
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||||
|
In this example, use "John Doe" as the entity ID.
|
||||||
|
Remember, the knowledge graph should be coherent and easily understandable,
|
||||||
|
so maintaining consistency in entity references is crucial.
|
||||||
|
## 5. Strict Compliance
|
||||||
|
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
||||||
|
],
|
||||||
|
response_model=KnowledgeGraph,
|
||||||
|
)
|
||||||
|
|
||||||
|
class AbstractGraphDB(ABC):
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def query(self, query: str, params=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# @abstractmethod
|
||||||
|
# def create_nodes(self, nodes: List[dict]):
|
||||||
|
# pass
|
||||||
|
#
|
||||||
|
# @abstractmethod
|
||||||
|
# def create_edges(self, edges: List[dict]):
|
||||||
|
# pass
|
||||||
|
#
|
||||||
|
# @abstractmethod
|
||||||
|
# def create_memory_type_relationships(self, nodes: List[dict], memory_type: str):
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
|
class Neo4jGraphDB(AbstractGraphDB):
|
||||||
|
def __init__(self, url, username, password):
|
||||||
|
self.graph = Neo4jGraph(url=url, username=username, password=password)
|
||||||
|
self.openai_key = config.openai_key
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def query(self, query, params=None):
|
||||||
|
return self.graph.query(query, params)
|
||||||
|
# Initialize the Neo4j connection here
|
||||||
|
|
||||||
|
|
||||||
|
def create_base_cognitive_architecture(self, user_id: str):
|
||||||
|
# Create the user and memory components if they don't exist
|
||||||
|
user_memory_cypher = f"""
|
||||||
|
MERGE (user:User {{userId: '{user_id}'}})
|
||||||
|
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
|
||||||
|
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
|
||||||
|
MERGE (buffer:Buffer {{userId: '{user_id}'}})
|
||||||
|
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||||
|
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||||
|
MERGE (user)-[:HAS_BUFFER]->(buffer)
|
||||||
|
"""
|
||||||
|
|
||||||
|
return user_memory_cypher
|
||||||
|
|
||||||
|
def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph:
|
||||||
|
return openai.ChatCompletion.create(
|
||||||
|
model=config.model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
||||||
|
|
||||||
|
},
|
||||||
|
{"role": "system", "content": """You are a top-tier algorithm
|
||||||
|
designed for extracting information in structured formats to build a knowledge graph.
|
||||||
|
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||||
|
- The aim is to achieve simplicity and clarity in the
|
||||||
|
knowledge graph, making it accessible for a vast audience.
|
||||||
|
## 2. Labeling Nodes
|
||||||
|
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||||
|
- For example, when you identify an entity representing a person,
|
||||||
|
always label it as **"person"**.
|
||||||
|
Avoid using more specific terms like "mathematician" or "scientist".
|
||||||
|
- Include event, entity, time, or action nodes to the category.
|
||||||
|
- Classify the memory type as episodic or semantic.
|
||||||
|
- **Node IDs**: Never utilize integers as node IDs.
|
||||||
|
Node IDs should be names or human-readable identifiers found in the text.
|
||||||
|
## 3. Handling Numerical Data and Dates
|
||||||
|
- Numerical data, like age or other related information,
|
||||||
|
should be incorporated as attributes or properties of the respective nodes.
|
||||||
|
- **No Separate Nodes for Dates/Numbers**:
|
||||||
|
Do not create separate nodes for dates or numerical values.
|
||||||
|
Always attach them as attributes or properties of nodes.
|
||||||
|
- **Property Format**: Properties must be in a key-value format.
|
||||||
|
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||||
|
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||||
|
## 4. Coreference Resolution
|
||||||
|
- **Maintain Entity Consistency**:
|
||||||
|
When extracting entities, it's vital to ensure consistency.
|
||||||
|
If an entity, such as "John Doe", is mentioned multiple times
|
||||||
|
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||||
|
always use the most complete identifier for that entity throughout the knowledge graph.
|
||||||
|
In this example, use "John Doe" as the entity ID.
|
||||||
|
Remember, the knowledge graph should be coherent and easily understandable,
|
||||||
|
so maintaining consistency in entity references is crucial.
|
||||||
|
## 5. Strict Compliance
|
||||||
|
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
||||||
|
],
|
||||||
|
response_model=KnowledgeGraph,
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping):
|
||||||
|
create_statements = []
|
||||||
|
for node in nodes:
|
||||||
|
original_variable_name = base_node_mapping[node['id']]
|
||||||
|
unique_variable_name = unique_mapping[original_variable_name]
|
||||||
|
node_label = node['category'].capitalize()
|
||||||
|
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
|
||||||
|
try:
|
||||||
|
properties = format_dict(properties)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
|
||||||
|
return create_statements
|
||||||
|
|
||||||
|
# Update the function to generate Cypher CREATE statements for edges with unique variable names
|
||||||
|
def generate_create_statements_for_edges_with_uuid(self, edges, unique_mapping, base_node_mapping):
|
||||||
|
create_statements = []
|
||||||
|
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||||
|
create_statements.append(with_statement)
|
||||||
|
|
||||||
|
for edge in edges:
|
||||||
|
# print("HERE IS THE EDGE", edge)
|
||||||
|
source_variable = unique_mapping[base_node_mapping[edge['source']]]
|
||||||
|
target_variable = unique_mapping[base_node_mapping[edge['target']]]
|
||||||
|
relationship = edge['description'].replace(" ", "_").upper()
|
||||||
|
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
|
||||||
|
return create_statements
|
||||||
|
|
||||||
|
def generate_memory_type_relationships_with_uuid_and_time_context(self, nodes, unique_mapping, base_node_mapping):
|
||||||
|
create_statements = []
|
||||||
|
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
||||||
|
create_statements.append(with_statement)
|
||||||
|
|
||||||
|
# Loop through each node and create relationships based on memory_type
|
||||||
|
for node in nodes:
|
||||||
|
original_variable_name = base_node_mapping[node['id']]
|
||||||
|
unique_variable_name = unique_mapping[original_variable_name]
|
||||||
|
if node['memory_type'] == 'semantic':
|
||||||
|
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
|
||||||
|
elif node['memory_type'] == 'episodic':
|
||||||
|
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
|
||||||
|
if node['category'] == 'time':
|
||||||
|
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
|
||||||
|
|
||||||
|
# Assuming buffer holds all actions and times
|
||||||
|
# if node['category'] in ['action', 'time']:
|
||||||
|
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
|
||||||
|
|
||||||
|
return create_statements
|
||||||
|
|
||||||
|
def generate_cypher_query_for_user_prompt_decomposition(self, user_id):
|
||||||
|
|
||||||
|
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
||||||
|
graph_dic = graph.dict()
|
||||||
|
|
||||||
|
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
||||||
|
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
||||||
|
# Create unique variable names for each node
|
||||||
|
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
||||||
|
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
||||||
|
create_nodes_statements = self.generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
|
||||||
|
create_edges_statements =self.generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping, node_variable_mapping)
|
||||||
|
|
||||||
|
memory_type_statements_with_uuid_and_time_context = self.generate_memory_type_relationships_with_uuid_and_time_context(
|
||||||
|
graph_dic['nodes'], unique_node_variable_mapping, node_variable_mapping)
|
||||||
|
|
||||||
|
# # Combine all statements
|
||||||
|
cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||||
|
cypher_statements_joined = "\n".join(cypher_statements)
|
||||||
|
return cypher_statements_joined
|
||||||
|
|
||||||
|
|
||||||
|
def update_user_query_for_user_prompt_decomposition(self, user_id, user_query):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def delete_all_user_memories(self, user_id):
|
||||||
|
try:
|
||||||
|
# Check if the user exists
|
||||||
|
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||||
|
if not user_exists:
|
||||||
|
return f"No user found with ID: {user_id}"
|
||||||
|
|
||||||
|
# Delete all memory nodes and relationships for the given user
|
||||||
|
delete_query = f"""
|
||||||
|
MATCH (user:User {{userId: '{user_id}'}})-[r]-()
|
||||||
|
DELETE r
|
||||||
|
WITH user
|
||||||
|
MATCH (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||||
|
MATCH (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
||||||
|
MATCH (user)-[:HAS_BUFFER]->(buffer)
|
||||||
|
DETACH DELETE semantic, episodic, buffer
|
||||||
|
"""
|
||||||
|
self.graph.query(delete_query)
|
||||||
|
return f"All memories deleted for user ID: {user_id}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"An error occurred: {str(e)}"
|
||||||
|
|
||||||
|
def delete_specific_memory_type(self, user_id, memory_type):
|
||||||
|
try:
|
||||||
|
# Check if the user exists
|
||||||
|
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||||
|
if not user_exists:
|
||||||
|
return f"No user found with ID: {user_id}"
|
||||||
|
|
||||||
|
# Validate memory type
|
||||||
|
if memory_type not in ['SemanticMemory', 'EpisodicMemory', 'Buffer']:
|
||||||
|
return "Invalid memory type. Choose from 'SemanticMemory', 'EpisodicMemory', or 'Buffer'."
|
||||||
|
|
||||||
|
# Delete specific memory type nodes and relationships for the given user
|
||||||
|
delete_query = f"""
|
||||||
|
MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory)
|
||||||
|
DETACH DELETE memory
|
||||||
|
"""
|
||||||
|
self.graph.query(delete_query)
|
||||||
|
return f"{memory_type} deleted for user ID: {user_id}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"An error occurred: {str(e)}"
|
||||||
|
def retrieve_semantic_memory(self, user_id: str):
|
||||||
|
query = """
|
||||||
|
MATCH (user:User {userId: $user_id})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)
|
||||||
|
MATCH (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
||||||
|
RETURN knowledge
|
||||||
|
"""
|
||||||
|
return self.query(query, params={"user_id": user_id})
|
||||||
|
|
||||||
|
def retrieve_episodic_memory(self, user_id: str):
|
||||||
|
query = """
|
||||||
|
MATCH (user:User {userId: $user_id})-[:HAS_EPISODIC_MEMORY]->(episodic:EpisodicMemory)
|
||||||
|
MATCH (episodic)-[:HAS_EVENT]->(event)
|
||||||
|
RETURN event
|
||||||
|
"""
|
||||||
|
return self.query(query, params={"user_id": user_id})
|
||||||
|
|
||||||
|
def retrieve_buffer_memory(self, user_id: str):
|
||||||
|
query = """
|
||||||
|
MATCH (user:User {userId: $user_id})-[:HAS_BUFFER]->(buffer:Buffer)
|
||||||
|
MATCH (buffer)-[:CURRENTLY_HOLDING]->(item)
|
||||||
|
RETURN item
|
||||||
|
"""
|
||||||
|
return self.query(query, params={"user_id": user_id})
|
||||||
|
def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
|
||||||
|
""" This function takes a document and generates a document summary in Semantic Memory"""
|
||||||
|
create_statements = []
|
||||||
|
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
|
||||||
|
create_statements.append(with_statement)
|
||||||
|
|
||||||
|
# Loop through each node and create relationships based on memory_type
|
||||||
|
|
||||||
|
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
|
||||||
|
|
||||||
|
|
||||||
|
return create_statements
|
||||||
|
|
||||||
|
|
||||||
|
def generate_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
|
||||||
|
""" This function takes a document and generates a document summary in Semantic Memory"""
|
||||||
|
|
||||||
|
|
||||||
|
# fetch namespace from postgres db
|
||||||
|
# fetch 1st and last page from vector store
|
||||||
|
# summarize the text, add document type
|
||||||
|
# write to postgres
|
||||||
|
create_statements = []
|
||||||
|
with_statement = f"WITH {', '.join(unique_graphdb_mapping_values.values())}, user, semantic, episodic, buffer"
|
||||||
|
create_statements.append(with_statement)
|
||||||
|
|
||||||
|
# Loop through each node and create relationships based on memory_type
|
||||||
|
|
||||||
|
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_graphdb_mapping_values})")
|
||||||
|
|
||||||
|
|
||||||
|
return create_statements
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- document_summary (dict): A dictionary containing the document's category, title, and summary.
|
||||||
|
- user_id (str): The unique identifier for the user.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- str: A Cypher query string with parameters.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
- ValueError: If any required data is missing or invalid.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Validate the input parameters
|
||||||
|
if not isinstance(document_summary, dict):
|
||||||
|
raise ValueError("The document_summary must be a dictionary.")
|
||||||
|
if not all(key in document_summary for key in ['document_category', 'title', 'summary']):
|
||||||
|
raise ValueError("The document_summary dictionary is missing required keys.")
|
||||||
|
if not isinstance(user_id, str) or not user_id:
|
||||||
|
raise ValueError("The user_id must be a non-empty string.")
|
||||||
|
|
||||||
|
# Escape single quotes in the document summary data (if not using parameters)
|
||||||
|
# title = document_summary['title'].replace("'", "\\'")
|
||||||
|
# summary = document_summary['summary'].replace("'", "\\'")
|
||||||
|
# document_category = document_summary['document_category'].replace("'", "\\'")
|
||||||
|
|
||||||
|
# Generate the Cypher query using parameters
|
||||||
|
cypher_query = f'''
|
||||||
|
// Ensure the User node exists
|
||||||
|
MERGE (user:User {{ userId: $user_id }})
|
||||||
|
|
||||||
|
// Ensure the SemanticMemory node exists and is connected to the User
|
||||||
|
MERGE (semantic:SemanticMemory {{ userId: $user_id }})
|
||||||
|
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||||
|
|
||||||
|
// Create the Document node with its properties
|
||||||
|
CREATE (document:Document {{
|
||||||
|
title: $title,
|
||||||
|
summary: $summary,
|
||||||
|
documentCategory: $document_category
|
||||||
|
}})
|
||||||
|
|
||||||
|
// Link the Document node to the SemanticMemory node
|
||||||
|
CREATE (semantic)-[:HAS_DOCUMENT]->(document)
|
||||||
|
'''
|
||||||
|
|
||||||
|
return cypher_query
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str):
|
||||||
|
# Generate the Cypher query
|
||||||
|
cypher_query = f'''
|
||||||
|
MATCH (user:User {{userId: $user_id}})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
|
||||||
|
SET document.vectordbNamespace = $vectordb_namespace
|
||||||
|
RETURN document
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Parameters for the query
|
||||||
|
parameters = {
|
||||||
|
'user_id': user_id,
|
||||||
|
'vectordb_namespace': vectordb_namespace
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute the query with the provided parameters
|
||||||
|
result = self.query(cypher_query, parameters)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NetworkXGraphDB(AbstractGraphDB):
|
||||||
|
def __init__(self):
|
||||||
|
self.graph = nx.Graph()
|
||||||
|
# Initialize other necessary properties or configurations
|
||||||
|
|
||||||
|
def create_base_cognitive_architecture(self, user_id: str):
|
||||||
|
# Add nodes for user and memory types if they don't exist
|
||||||
|
self.graph.add_node(user_id, type='User')
|
||||||
|
self.graph.add_node(f"{user_id}_semantic", type='SemanticMemory')
|
||||||
|
self.graph.add_node(f"{user_id}_episodic", type='EpisodicMemory')
|
||||||
|
self.graph.add_node(f"{user_id}_buffer", type='Buffer')
|
||||||
|
|
||||||
|
# Add edges to connect user to memory types
|
||||||
|
self.graph.add_edge(user_id, f"{user_id}_semantic", relation='HAS_SEMANTIC_MEMORY')
|
||||||
|
self.graph.add_edge(user_id, f"{user_id}_episodic", relation='HAS_EPISODIC_MEMORY')
|
||||||
|
self.graph.add_edge(user_id, f"{user_id}_buffer", relation='HAS_BUFFER')
|
||||||
|
|
||||||
|
def delete_all_user_memories(self, user_id: str):
|
||||||
|
# Remove nodes and edges related to the user's memories
|
||||||
|
for memory_type in ['semantic', 'episodic', 'buffer']:
|
||||||
|
memory_node = f"{user_id}_{memory_type}"
|
||||||
|
self.graph.remove_node(memory_node)
|
||||||
|
|
||||||
|
def delete_specific_memory_type(self, user_id: str, memory_type: str):
|
||||||
|
# Remove a specific type of memory node and its related edges
|
||||||
|
memory_node = f"{user_id}_{memory_type.lower()}"
|
||||||
|
if memory_node in self.graph:
|
||||||
|
self.graph.remove_node(memory_node)
|
||||||
|
|
||||||
|
# Methods for retrieving semantic, episodic, and buffer memories
|
||||||
|
def retrieve_semantic_memory(self, user_id: str):
|
||||||
|
return [n for n in self.graph.neighbors(f"{user_id}_semantic")]
|
||||||
|
|
||||||
|
def retrieve_episodic_memory(self, user_id: str):
|
||||||
|
return [n for n in self.graph.neighbors(f"{user_id}_episodic")]
|
||||||
|
|
||||||
|
def retrieve_buffer_memory(self, user_id: str):
|
||||||
|
return [n for n in self.graph.neighbors(f"{user_id}_buffer")]
|
||||||
|
|
||||||
|
class GraphDBFactory:
|
||||||
|
def create_graph_db(self, db_type, **kwargs):
|
||||||
|
if db_type == 'neo4j':
|
||||||
|
return Neo4jGraphDB(**kwargs)
|
||||||
|
elif db_type == 'networkx':
|
||||||
|
return NetworkXGraphDB(**kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported database type: {db_type}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
class DocsModel(Base):
|
class DocsModel(Base):
|
||||||
__tablename__ = 'docs'
|
__tablename__ = 'docs'
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
class MemoryModel(Base):
|
class MemoryModel(Base):
|
||||||
__tablename__ = 'memories'
|
__tablename__ = 'memories'
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class MetaDatas(Base):
|
class MetaDatas(Base):
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class Operation(Base):
|
class Operation(Base):
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class Session(Base):
|
class Session(Base):
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class TestOutput(Base):
|
class TestOutput(Base):
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sqlalchemy.orm import relationship
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class TestSet(Base):
|
class TestSet(Base):
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from sqlalchemy.ext.declarative import declarative_base
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from database.database import Base
|
from ..database.database import Base
|
||||||
|
|
||||||
|
|
||||||
class User(Base):
|
class User(Base):
|
||||||
|
|
|
||||||
10
level_4/cognitive_architecture/presets.py
Normal file
10
level_4/cognitive_architecture/presets.py
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
DEFAULT_PRESET = "cognitive_architecture_chat"
|
||||||
|
preset_options = [DEFAULT_PRESET]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def use_preset():
|
||||||
|
"""Placeholder for different present options"""
|
||||||
|
|
||||||
|
pass
|
||||||
0
level_4/cognitive_architecture/shared/__init__.py
Normal file
0
level_4/cognitive_architecture/shared/__init__.py
Normal file
7
level_4/cognitive_architecture/shared/chunk_strategy.py
Normal file
7
level_4/cognitive_architecture/shared/chunk_strategy.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class ChunkStrategy(Enum):
|
||||||
|
EXACT = 'exact'
|
||||||
|
PARAGRAPH = 'paragraph'
|
||||||
|
SENTENCE = 'sentence'
|
||||||
|
VANILLA = 'vanilla'
|
||||||
79
level_4/cognitive_architecture/utils.py
Normal file
79
level_4/cognitive_architecture/utils.py
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from graphviz import Digraph
|
||||||
|
|
||||||
|
# from graph_database.graph import KnowledgeGraph
|
||||||
|
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, id, description, color):
|
||||||
|
self.id = id
|
||||||
|
self.description = description
|
||||||
|
self.color = color
|
||||||
|
|
||||||
|
class Edge:
|
||||||
|
def __init__(self, source, target, label, color):
|
||||||
|
self.source = source
|
||||||
|
self.target = target
|
||||||
|
self.label = label
|
||||||
|
self.color = color
|
||||||
|
# def visualize_knowledge_graph(kg: KnowledgeGraph):
|
||||||
|
# dot = Digraph(comment="Knowledge Graph")
|
||||||
|
#
|
||||||
|
# # Add nodes
|
||||||
|
# for node in kg.nodes:
|
||||||
|
# dot.node(str(node.id), node.description, color=node.color)
|
||||||
|
#
|
||||||
|
# # Add edges
|
||||||
|
# for edge in kg.edges:
|
||||||
|
# dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
|
||||||
|
#
|
||||||
|
# # Render the graph
|
||||||
|
# dot.render("knowledge_graph.gv", view=True)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def format_dict(d):
|
||||||
|
# Initialize an empty list to store formatted items
|
||||||
|
formatted_items = []
|
||||||
|
|
||||||
|
# Iterate through all key-value pairs
|
||||||
|
for key, value in d.items():
|
||||||
|
# Format key-value pairs with a colon and space, and adding quotes for string values
|
||||||
|
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
|
||||||
|
formatted_items.append(formatted_item)
|
||||||
|
|
||||||
|
# Join all formatted items with a comma and a space
|
||||||
|
formatted_string = ", ".join(formatted_items)
|
||||||
|
|
||||||
|
# Add curly braces to mimic a dictionary
|
||||||
|
formatted_string = f"{{{formatted_string}}}"
|
||||||
|
|
||||||
|
return formatted_string
|
||||||
|
|
||||||
|
|
||||||
|
def append_uuid_to_variable_names(variable_mapping):
|
||||||
|
unique_variable_mapping = {}
|
||||||
|
for original_name in variable_mapping.values():
|
||||||
|
unique_name = f"{original_name}_{uuid.uuid4().hex}"
|
||||||
|
unique_variable_mapping[original_name] = unique_name
|
||||||
|
return unique_variable_mapping
|
||||||
|
|
||||||
|
|
||||||
|
# Update the functions to use the unique variable names
|
||||||
|
def create_node_variable_mapping(nodes):
|
||||||
|
mapping = {}
|
||||||
|
for node in nodes:
|
||||||
|
variable_name = f"{node['category']}{node['id']}".lower()
|
||||||
|
mapping[node['id']] = variable_name
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def create_edge_variable_mapping(edges):
|
||||||
|
mapping = {}
|
||||||
|
for edge in edges:
|
||||||
|
# Construct a unique identifier for the edge
|
||||||
|
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
|
||||||
|
mapping[(edge['source'], edge['target'])] = variable_name
|
||||||
|
return mapping
|
||||||
|
|
@ -4,7 +4,7 @@ from io import BytesIO
|
||||||
import os, sys
|
import os, sys
|
||||||
# Add the parent directory to sys.path
|
# Add the parent directory to sys.path
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
|
from ..vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
import marvin
|
import marvin
|
||||||
|
|
@ -13,13 +13,13 @@ from dotenv import load_dotenv
|
||||||
from langchain.document_loaders import PyPDFLoader
|
from langchain.document_loaders import PyPDFLoader
|
||||||
from langchain.retrievers import WeaviateHybridSearchRetriever
|
from langchain.retrievers import WeaviateHybridSearchRetriever
|
||||||
from weaviate.gql.get import HybridFusion
|
from weaviate.gql.get import HybridFusion
|
||||||
from models.sessions import Session
|
from ..models.sessions import Session
|
||||||
from models.testset import TestSet
|
from ..models.testset import TestSet
|
||||||
from models.testoutput import TestOutput
|
from ..models.testoutput import TestOutput
|
||||||
from models.metadatas import MetaDatas
|
from ..models.metadatas import MetaDatas
|
||||||
from models.operation import Operation
|
from ..models.operation import Operation
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from database.database import engine
|
from ..database.database import engine
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import time
|
import time
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from langchain.document_loaders import PyPDFLoader
|
from langchain.document_loaders import PyPDFLoader
|
||||||
import sys, os
|
import sys, os
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
from shared.chunk_strategy import ChunkStrategy
|
from ..shared.chunk_strategy import ChunkStrategy
|
||||||
import re
|
import re
|
||||||
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
|
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
|
||||||
|
|
||||||
|
|
|
||||||
490
level_4/main.py
490
level_4/main.py
|
|
@ -9,23 +9,14 @@ from langchain.chains import GraphCypherQAChain
|
||||||
from langchain.chat_models import ChatOpenAI
|
from langchain.chat_models import ChatOpenAI
|
||||||
# from marvin import ai_classifier
|
# from marvin import ai_classifier
|
||||||
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
|
# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
DEFAULT_PRESET = "promethai_chat"
|
|
||||||
preset_options = [DEFAULT_PRESET]
|
|
||||||
import questionary
|
|
||||||
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_config_dir():
|
|
||||||
if not os.path.exists(PROMETHAI_DIR):
|
|
||||||
os.makedirs(PROMETHAI_DIR, exist_ok=True)
|
|
||||||
|
|
||||||
folders = ["personas", "humans", "archival", "agents"]
|
|
||||||
for folder in folders:
|
|
||||||
if not os.path.exists(os.path.join(PROMETHAI_DIR, folder)):
|
|
||||||
os.makedirs(os.path.join(PROMETHAI_DIR, folder))
|
|
||||||
|
|
||||||
|
|
||||||
|
from cognitive_architecture.models.sessions import Session
|
||||||
|
from cognitive_architecture.models.testset import TestSet
|
||||||
|
from cognitive_architecture.models.testoutput import TestOutput
|
||||||
|
from cognitive_architecture.models.metadatas import MetaDatas
|
||||||
|
from cognitive_architecture.models.operation import Operation
|
||||||
|
from cognitive_architecture.models.docs import DocsModel
|
||||||
|
from cognitive_architecture.models.memory import MemoryModel
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -40,14 +31,8 @@ import uuid
|
||||||
|
|
||||||
from graphviz import Digraph
|
from graphviz import Digraph
|
||||||
|
|
||||||
|
from cognitive_architecture.database.database_crud import session_scope
|
||||||
load_dotenv()
|
from cognitive_architecture.database.database import AsyncSessionLocal
|
||||||
|
|
||||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
|
||||||
|
|
||||||
txt_path = "dune.txt"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import instructor
|
import instructor
|
||||||
|
|
@ -57,333 +42,188 @@ import instructor
|
||||||
instructor.patch()
|
instructor.patch()
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List
|
from typing import List
|
||||||
|
DEFAULT_PRESET = "promethai_chat"
|
||||||
|
preset_options = [DEFAULT_PRESET]
|
||||||
|
import questionary
|
||||||
|
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
class Node(BaseModel):
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
id: int
|
from cognitive_architecture.config import Config
|
||||||
description: str
|
|
||||||
category: str
|
config = Config()
|
||||||
color: str ="blue"
|
config.load()
|
||||||
memory_type: str
|
|
||||||
|
print(config.model)
|
||||||
|
print(config.openai_key)
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
class Edge(BaseModel):
|
|
||||||
source: int
|
|
||||||
target: int
|
|
||||||
description: str
|
|
||||||
color: str= "blue"
|
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeGraph(BaseModel):
|
|
||||||
nodes: List[Node] = Field(..., default_factory=list)
|
|
||||||
edges: List[Edge] = Field(..., default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
def generate_graph(input) -> KnowledgeGraph:
|
|
||||||
return openai.ChatCompletion.create(
|
|
||||||
model="gpt-4-1106-preview",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": f"""Use the given format to extract information from the following input: {input}. """,
|
|
||||||
|
|
||||||
},
|
|
||||||
{ "role":"system", "content": """You are a top-tier algorithm
|
|
||||||
designed for extracting information in structured formats to build a knowledge graph.
|
|
||||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
|
||||||
- The aim is to achieve simplicity and clarity in the
|
|
||||||
knowledge graph, making it accessible for a vast audience.
|
|
||||||
## 2. Labeling Nodes
|
|
||||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
|
||||||
- For example, when you identify an entity representing a person,
|
|
||||||
always label it as **"person"**.
|
|
||||||
Avoid using more specific terms like "mathematician" or "scientist".
|
|
||||||
- Include event, entity, time, or action nodes to the category.
|
|
||||||
- Classify the memory type as episodic or semantic.
|
|
||||||
- **Node IDs**: Never utilize integers as node IDs.
|
|
||||||
Node IDs should be names or human-readable identifiers found in the text.
|
|
||||||
## 3. Handling Numerical Data and Dates
|
|
||||||
- Numerical data, like age or other related information,
|
|
||||||
should be incorporated as attributes or properties of the respective nodes.
|
|
||||||
- **No Separate Nodes for Dates/Numbers**:
|
|
||||||
Do not create separate nodes for dates or numerical values.
|
|
||||||
Always attach them as attributes or properties of nodes.
|
|
||||||
- **Property Format**: Properties must be in a key-value format.
|
|
||||||
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
|
||||||
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
|
||||||
## 4. Coreference Resolution
|
|
||||||
- **Maintain Entity Consistency**:
|
|
||||||
When extracting entities, it's vital to ensure consistency.
|
|
||||||
If an entity, such as "John Doe", is mentioned multiple times
|
|
||||||
in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
|
||||||
always use the most complete identifier for that entity throughout the knowledge graph.
|
|
||||||
In this example, use "John Doe" as the entity ID.
|
|
||||||
Remember, the knowledge graph should be coherent and easily understandable,
|
|
||||||
so maintaining consistency in entity references is crucial.
|
|
||||||
## 5. Strict Compliance
|
|
||||||
Adhere to the rules strictly. Non-compliance will result in termination."""}
|
|
||||||
],
|
|
||||||
response_model=KnowledgeGraph,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def execute_cypher_query(query: str):
|
|
||||||
graph_ = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="pleaseletmein")
|
|
||||||
graph_.query(query)
|
|
||||||
# This is a placeholder for the logic that will execute the Cypher query
|
|
||||||
# You would replace this with the actual logic to run the query in your Neo4j database
|
|
||||||
print(query)
|
|
||||||
|
|
||||||
#Execute Cypher queries to create the user and memory components if they don't exist
|
|
||||||
#
|
|
||||||
# graph.query(
|
|
||||||
# f"""
|
|
||||||
# // Ensure the User node exists
|
|
||||||
# MERGE (user:User {{ userId: {user} }})
|
|
||||||
#
|
|
||||||
# // Ensure the SemanticMemory node exists
|
|
||||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
|
||||||
# MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
|
||||||
#
|
|
||||||
# // Ensure the EpisodicMemory node exists
|
|
||||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
|
||||||
# MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
|
||||||
#
|
|
||||||
# // Ensure the Buffer node exists
|
|
||||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
|
||||||
# MERGE (user)-[:HAS_BUFFER]->(buffer)
|
|
||||||
# """
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# # Execute Cypher queries to create the cognitive components in the graph
|
|
||||||
# graph.query(
|
|
||||||
# f"""
|
|
||||||
# // Parsing the query into components and linking them to the user and memory components
|
|
||||||
# MERGE (user:User {{ userId: {user} }})
|
|
||||||
# MERGE (semantic:SemanticMemory {{ userId: {user} }})
|
|
||||||
# MERGE (episodic:EpisodicMemory {{ userId: {user} }})
|
|
||||||
# MERGE (buffer:Buffer {{ userId: {user} }})
|
|
||||||
#
|
|
||||||
# CREATE (action1:Event {{ description: 'take a walk', location: 'forest' }})
|
|
||||||
# CREATE (action2:Event {{ description: 'get information', source: 'book' }})
|
|
||||||
# CREATE (time:TimeContext {{ description: 'in the afternoon' }})
|
|
||||||
#
|
|
||||||
# WITH user, semantic, episodic, buffer, action1, action2, time
|
|
||||||
# CREATE (knowledge:Knowledge {{ content: 'information from a book' }})
|
|
||||||
# CREATE (semantic)-[:HAS_KNOWLEDGE]->(knowledge)
|
|
||||||
# CREATE (episodic)-[:HAS_EVENT]->(action1)
|
|
||||||
# CREATE (episodic)-[:HAS_EVENT]->(action2)
|
|
||||||
# CREATE (episodic)-[:HAS_TIME_CONTEXT]->(time)
|
|
||||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action1)
|
|
||||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action2)
|
|
||||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(time)
|
|
||||||
# """
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
class Node:
|
async def get_vectordb_namespace(session: AsyncSession, user_id: str):
|
||||||
def __init__(self, id, description, color):
|
try:
|
||||||
self.id = id
|
result = await session.execute(
|
||||||
self.description = description
|
select(MemoryModel.id).where(MemoryModel.user_id == user_id).order_by(MemoryModel.created_at.desc()).limit(1)
|
||||||
self.color = color
|
)
|
||||||
|
namespace = result.scalar_one_or_none()
|
||||||
|
return namespace
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An error occurred while retrieving the Vectordb_namespace: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
class Edge:
|
# async def retrieve_job_by_id(session, user_id, job_id):
|
||||||
def __init__(self, source, target, label, color):
|
# try:
|
||||||
self.source = source
|
# result = await session.execute(
|
||||||
self.target = target
|
# session.query(Session.id)
|
||||||
self.label = label
|
# .filter_by(user_id=user_id, id=job_id)
|
||||||
self.color = color
|
# .order_by(Session.created_at)
|
||||||
def visualize_knowledge_graph(kg: KnowledgeGraph):
|
# )
|
||||||
dot = Digraph(comment="Knowledge Graph")
|
# return result.scalar_one_or_none()
|
||||||
|
# except Exception as e:
|
||||||
# Add nodes
|
# logging.error(f"An error occurred while retrieving the job: {str(e)}")
|
||||||
for node in kg.nodes:
|
# return None
|
||||||
dot.node(str(node.id), node.description, color=node.color)
|
|
||||||
|
|
||||||
# Add edges
|
|
||||||
for edge in kg.edges:
|
|
||||||
dot.edge(str(edge.source), str(edge.target), label=edge.description, color=edge.color)
|
|
||||||
|
|
||||||
# Render the graph
|
|
||||||
dot.render("knowledge_graph.gv", view=True)
|
|
||||||
|
|
||||||
|
|
||||||
def create_base_queries_from_user( user_id: str):
|
|
||||||
# Create the user and memory components if they don't exist
|
|
||||||
user_memory_cypher = f"""
|
async def update_document_vectordb_namespace(postgres_session: AsyncSession, user_id: str, namespace: str = None):
|
||||||
MERGE (user:User {{userId: '{user_id}'}})
|
|
||||||
MERGE (semantic:SemanticMemory {{userId: '{user_id}'}})
|
|
||||||
MERGE (episodic:EpisodicMemory {{userId: '{user_id}'}})
|
|
||||||
MERGE (buffer:Buffer {{userId: '{user_id}'}})
|
|
||||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
|
||||||
MERGE (user)-[:HAS_EPISODIC_MEMORY]->(episodic)
|
|
||||||
MERGE (user)-[:HAS_BUFFER]->(buffer)
|
|
||||||
"""
|
"""
|
||||||
|
Update the Document node with the Vectordb_namespace for the given user. If the namespace is not provided,
|
||||||
|
it will be retrieved from the PostgreSQL database.
|
||||||
|
|
||||||
return user_memory_cypher
|
Args:
|
||||||
|
postgres_session (AsyncSession): The async session for connecting to the PostgreSQL database.
|
||||||
|
user_id (str): The user's unique identifier.
|
||||||
|
namespace (str, optional): The Vectordb_namespace. If None, it will be retrieved from the database.
|
||||||
|
|
||||||
# Function to append a UUID4 to the variable names to ensure uniqueness
|
Returns:
|
||||||
def append_uuid_to_variable_names(variable_mapping):
|
The result of the update operation or None if an error occurred.
|
||||||
unique_variable_mapping = {}
|
"""
|
||||||
for original_name in variable_mapping.values():
|
vectordb_namespace = namespace
|
||||||
unique_name = f"{original_name}_{uuid.uuid4().hex}"
|
|
||||||
unique_variable_mapping[original_name] = unique_name
|
|
||||||
return unique_variable_mapping
|
|
||||||
|
|
||||||
# Update the functions to use the unique variable names
|
# Retrieve namespace from the database if not provided
|
||||||
def create_node_variable_mapping(nodes):
|
if vectordb_namespace is None:
|
||||||
mapping = {}
|
vectordb_namespace = await get_vectordb_namespace(postgres_session, user_id)
|
||||||
for node in nodes:
|
if not vectordb_namespace:
|
||||||
variable_name = f"{node['category']}{node['id']}".lower()
|
logging.error("Vectordb_namespace could not be retrieved.")
|
||||||
mapping[node['id']] = variable_name
|
return None
|
||||||
return mapping
|
|
||||||
|
|
||||||
|
# Update the Document node in Neo4j with the namespace
|
||||||
def create_edge_variable_mapping(edges):
|
update_result = update_document_node_with_namespace(user_id, vectordb_namespace)
|
||||||
mapping = {}
|
return update_result
|
||||||
for edge in edges:
|
|
||||||
# Construct a unique identifier for the edge
|
|
||||||
variable_name = f"edge{edge['source']}to{edge['target']}".lower()
|
|
||||||
mapping[(edge['source'], edge['target'])] = variable_name
|
|
||||||
return mapping
|
|
||||||
|
|
||||||
|
|
||||||
# Update the function to generate Cypher CREATE statements for nodes with unique variable names
|
|
||||||
|
|
||||||
|
|
||||||
def format_dict(d):
|
|
||||||
# Initialize an empty list to store formatted items
|
|
||||||
formatted_items = []
|
|
||||||
|
|
||||||
# Iterate through all key-value pairs
|
|
||||||
for key, value in d.items():
|
|
||||||
# Format key-value pairs with a colon and space, and adding quotes for string values
|
|
||||||
formatted_item = f"{key}: '{value}'" if isinstance(value, str) else f"{key}: {value}"
|
|
||||||
formatted_items.append(formatted_item)
|
|
||||||
|
|
||||||
# Join all formatted items with a comma and a space
|
|
||||||
formatted_string = ", ".join(formatted_items)
|
|
||||||
|
|
||||||
# Add curly braces to mimic a dictionary
|
|
||||||
formatted_string = f"{{{formatted_string}}}"
|
|
||||||
|
|
||||||
return formatted_string
|
|
||||||
def generate_create_statements_for_nodes_with_uuid(nodes, unique_mapping):
|
|
||||||
create_statements = []
|
|
||||||
for node in nodes:
|
|
||||||
original_variable_name = node_variable_mapping[node['id']]
|
|
||||||
unique_variable_name = unique_mapping[original_variable_name]
|
|
||||||
node_label = node['category'].capitalize()
|
|
||||||
properties = {k: v for k, v in node.items() if k not in ['id', 'category']}
|
|
||||||
try:
|
|
||||||
properties = format_dict(properties)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
create_statements.append(f"CREATE ({unique_variable_name}:{node_label} {properties})")
|
|
||||||
return create_statements
|
|
||||||
|
|
||||||
# Update the function to generate Cypher CREATE statements for edges with unique variable names
|
|
||||||
def generate_create_statements_for_edges_with_uuid(edges, unique_mapping):
|
|
||||||
create_statements = []
|
|
||||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
|
||||||
create_statements.append(with_statement)
|
|
||||||
|
|
||||||
for edge in edges:
|
|
||||||
# print("HERE IS THE EDGE", edge)
|
|
||||||
source_variable = unique_mapping[node_variable_mapping[edge['source']]]
|
|
||||||
target_variable = unique_mapping[node_variable_mapping[edge['target']]]
|
|
||||||
relationship = edge['description'].replace(" ", "_").upper()
|
|
||||||
create_statements.append(f"CREATE ({source_variable})-[:{relationship}]->({target_variable})")
|
|
||||||
return create_statements
|
|
||||||
|
|
||||||
|
|
||||||
# Update the function to generate Cypher CREATE statements for memory type relationships with unique variable names
|
|
||||||
def generate_memory_type_relationships_with_uuid_and_time_context(nodes, unique_mapping):
|
|
||||||
create_statements = []
|
|
||||||
with_statement = f"WITH {', '.join(unique_mapping.values())}, user, semantic, episodic, buffer"
|
|
||||||
create_statements.append(with_statement)
|
|
||||||
|
|
||||||
# Loop through each node and create relationships based on memory_type
|
|
||||||
for node in nodes:
|
|
||||||
original_variable_name = node_variable_mapping[node['id']]
|
|
||||||
unique_variable_name = unique_mapping[original_variable_name]
|
|
||||||
if node['memory_type'] == 'semantic':
|
|
||||||
create_statements.append(f"CREATE (semantic)-[:HAS_KNOWLEDGE]->({unique_variable_name})")
|
|
||||||
elif node['memory_type'] == 'episodic':
|
|
||||||
create_statements.append(f"CREATE (episodic)-[:HAS_EVENT]->({unique_variable_name})")
|
|
||||||
if node['category'] == 'time':
|
|
||||||
create_statements.append(f"CREATE (buffer)-[:HAS_TIME_CONTEXT]->({unique_variable_name})")
|
|
||||||
|
|
||||||
# Assuming buffer holds all actions and times
|
|
||||||
# if node['category'] in ['action', 'time']:
|
|
||||||
create_statements.append(f"CREATE (buffer)-[:CURRENTLY_HOLDING]->({unique_variable_name})")
|
|
||||||
|
|
||||||
return create_statements
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Main execution logic
|
|
||||||
if __name__ == "__main__":
|
|
||||||
user_id = "User1"
|
|
||||||
query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
|
|
||||||
|
|
||||||
# Generate the knowledge graph from the user input
|
# query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store"
|
||||||
knowledge_graph = generate_graph(query_input)
|
|
||||||
visualize_knowledge_graph(knowledge_graph)
|
|
||||||
# out = knowledge_graph.dict()
|
|
||||||
# print(out)
|
|
||||||
#
|
#
|
||||||
graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
# # Generate the knowledge graph from the user input
|
||||||
graph_dic = graph.dict()
|
# knowledge_graph = generate_graph(query_input)
|
||||||
|
# visualize_knowledge_graph(knowledge_graph)
|
||||||
node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
# # out = knowledge_graph.dict()
|
||||||
edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
# # print(out)
|
||||||
# Create unique variable names for each node
|
# #
|
||||||
unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
# graph: KnowledgeGraph = generate_graph("I walked in the forest yesterday and added to my list I need to buy some milk in the store")
|
||||||
unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
# graph_dic = graph.dict()
|
||||||
create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
|
|
||||||
create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
|
|
||||||
|
|
||||||
memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
|
|
||||||
graph_dic['nodes'], unique_node_variable_mapping)
|
|
||||||
|
|
||||||
# # Combine all statements
|
|
||||||
cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
|
||||||
cypher_statements_joined = "\n".join(cypher_statements)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
execute_cypher_query(cypher_statements_joined)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Translate the KnowledgeGraph into Cypher queries
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Make document summary in Semantic Memory
|
|
||||||
# Document summary links to a Namespace in Vector Store
|
|
||||||
# Categorize document types in Semantic Memory
|
|
||||||
# Make a spine classifier that retrieves the relevant document namespaces from Vector Store
|
|
||||||
#
|
#
|
||||||
# Connect document summary to chunks in Weaviate vector store
|
# node_variable_mapping = create_node_variable_mapping(graph_dic['nodes'])
|
||||||
|
# edge_variable_mapping = create_edge_variable_mapping(graph_dic['edges'])
|
||||||
|
# # Create unique variable names for each node
|
||||||
|
# unique_node_variable_mapping = append_uuid_to_variable_names(node_variable_mapping)
|
||||||
|
# unique_edge_variable_mapping = append_uuid_to_variable_names(edge_variable_mapping)
|
||||||
|
# create_nodes_statements = generate_create_statements_for_nodes_with_uuid(graph_dic['nodes'], unique_node_variable_mapping)
|
||||||
|
# create_edges_statements = generate_create_statements_for_edges_with_uuid(graph_dic['edges'], unique_node_variable_mapping)
|
||||||
|
#
|
||||||
|
# memory_type_statements_with_uuid_and_time_context = generate_memory_type_relationships_with_uuid_and_time_context(
|
||||||
|
# graph_dic['nodes'], unique_node_variable_mapping)
|
||||||
|
#
|
||||||
|
# # # Combine all statements
|
||||||
|
# cypher_statements = [create_base_queries_from_user(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||||
|
# cypher_statements_joined = "\n".join(cypher_statements)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# execute_cypher_query(cypher_statements_joined)
|
||||||
|
|
||||||
|
# bartleby_summary = {
|
||||||
|
# "document_category": "Classic Literature",
|
||||||
# print(cypher_query)
|
# "title": "Bartleby, the Scrivener",
|
||||||
# #
|
# "summary": (
|
||||||
# # # Execute the Cypher queries to create the graph in Neo4j
|
# "Bartleby, the Scrivener: A Story of Wall Street' is a short story by Herman Melville "
|
||||||
# execute_cypher_query(cypher_query)
|
# "that tells the tale of Bartleby, a scrivener, or copyist, who works for a Manhattan "
|
||||||
# # Refresh the graph schema
|
# "lawyer. Initially, Bartleby is a competent and industrious worker. However, one day, "
|
||||||
# graph.refresh_schema()
|
# "when asked to proofread a document, he responds with what becomes his constant refrain "
|
||||||
|
# "to any request: 'I would prefer not to.' As the story progresses, Bartleby becomes "
|
||||||
|
# "increasingly passive, refusing not just work but also food and eventually life itself, "
|
||||||
|
# "as he spirals into a state of passive resistance. The lawyer, the narrator of the story, "
|
||||||
|
# "is both fascinated and frustrated by Bartleby's behavior. Despite attempts to understand "
|
||||||
|
# "and help him, Bartleby remains an enigmatic figure, his motives and thoughts unexplained. "
|
||||||
|
# "He is eventually evicted from the office and later found dead in a prison yard, having "
|
||||||
|
# "preferred not to live. The story is a meditation on the themes of isolation, societal "
|
||||||
|
# "obligation, and the inexplicable nature of human behavior."
|
||||||
|
# )
|
||||||
|
# }
|
||||||
|
# rs = create_document_node_cypher(bartleby_summary, user_id)
|
||||||
|
#
|
||||||
|
# parameters = {
|
||||||
|
# 'user_id': user_id,
|
||||||
|
# 'title': bartleby_summary['title'],
|
||||||
|
# 'summary': bartleby_summary['summary'],
|
||||||
|
# 'document_category': bartleby_summary['document_category']
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# execute_cypher_query(rs, parameters)
|
||||||
#
|
#
|
||||||
# # Print the schema to the console
|
# async def main():
|
||||||
# print(graph.schema)
|
# user_id = "User1"
|
||||||
|
#
|
||||||
|
# async with session_scope(AsyncSessionLocal()) as session:
|
||||||
|
# await update_document_vectordb_namespace(session, user_id)
|
||||||
|
#
|
||||||
|
# # print(rs)
|
||||||
|
#
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# import asyncio
|
||||||
|
#
|
||||||
|
# asyncio.run(main())
|
||||||
|
#
|
||||||
|
# # config = Config()
|
||||||
|
# # config.load()
|
||||||
|
# #
|
||||||
|
# # print(config.model)
|
||||||
|
# # print(config.openai_key)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
user_id = "User1"
|
||||||
|
from cognitive_architecture.graph_database.graph import Neo4jGraphDB
|
||||||
|
# Example initialization (replace with your actual connection details)
|
||||||
|
neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein')
|
||||||
|
# Generate the Cypher query for a specific user
|
||||||
|
user_id = 'user123' # Replace with the actual user ID
|
||||||
|
cypher_query = neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id)
|
||||||
|
# Execute the generated Cypher query
|
||||||
|
result = neo4j_graph_db.query(cypher_query)
|
||||||
|
|
||||||
|
# async with session_scope(AsyncSessionLocal()) as session:
|
||||||
|
# await update_document_vectordb_namespace(session, user_id)
|
||||||
|
|
||||||
|
# print(rs)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
Loading…
Add table
Reference in a new issue