Add all functions for architecture to work + for regular user flows, refactor and reformat issues, add boilerplate for loading jobs logic and extend search functionalities.

This commit is contained in:
Vasilije 2023-12-27 23:56:07 +01:00
parent 118613c484
commit 2928f51343
37 changed files with 1747 additions and 215 deletions

Binary file not shown.

View file

@ -12,3 +12,7 @@ COG_ARCH_DIR = cognitive_architecture
GRAPH_DB_URL =
GRAPH_DB_PW =
GRAPH_DB_USER =
AWS_ACCESS_KEY_ID =
AWS_SECRET_ACCESS_KEY =
QDRANT_API_KEY
QDRANT_API_URL

View file

@ -24,6 +24,7 @@ RUN apt-get update -q && \
curl \
zip \
jq \
libgl1-mesa-glx \
netcat-traditional && \
pip install poetry && \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \

153
api.py
View file

@ -95,6 +95,36 @@ async def add_memory(
content={"response": {"error": str(e)}}, status_code=503
)
@app.post("/add-architecture-public-memory", response_model=dict)
async def add_memory(
payload: Payload,
# files: List[UploadFile] = File(...),
):
try:
logging.info(" Adding to Memory ")
decoded_payload = payload.payload
async with session_scope(session=AsyncSessionLocal()) as session:
from main import load_documents_to_vectorstore
if 'content' in decoded_payload and decoded_payload['content'] is not None:
content = decoded_payload['content']
else:
content = None
user_id = 'system_user'
loader_settings = {
"format": "PDF",
"source": "DEVICE",
"path": [".data"]
}
output = await load_documents_to_vectorstore(session, user_id=user_id, content=content, loader_settings=loader_settings)
return JSONResponse(content={"response": output}, status_code=200)
except Exception as e:
return JSONResponse(
content={"response": {"error": str(e)}}, status_code=503
)
@app.post("/user-query-to-graph")
async def user_query_to_graph(payload: Payload):
try:
@ -120,22 +150,26 @@ async def document_to_graph_db(payload: Payload):
settings_for_loader = decoded_payload['settings']
else:
settings_for_loader = None
if 'memory_type' in decoded_payload and decoded_payload['memory_type'] is not None:
memory_type = decoded_payload['memory_type']
else:
memory_type = None
async with session_scope(session=AsyncSessionLocal()) as session:
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =settings_for_loader)
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], document_memory_types =memory_type)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/user-query-processor")
async def user_query_processor(payload: Payload):
@app.post("/cognitive-context-enrichment")
async def cognitive_context_enrichment(payload: Payload):
try:
decoded_payload = payload.payload
# Execute the query - replace this with the actual execution method
async with session_scope(session=AsyncSessionLocal()) as session:
# Assuming you have a method in Neo4jGraphDB to execute the query
result = await user_context_enrichment(session, decoded_payload['user_id'], decoded_payload['query'])
result = await user_context_enrichment(session, user_id = decoded_payload['user_id'], query= decoded_payload['query'], generative_response=decoded_payload['generative_response'], memory_type= decoded_payload['memory_type'])
return JSONResponse(content={"response": result}, status_code=200)
except Exception as e:
@ -163,12 +197,121 @@ async def user_query_classfier(payload: Payload):
async def drop_db(payload: Payload):
try:
decoded_payload = payload.payload
return JSONResponse(content={"response": "dropped"}, status_code=200)
if decoded_payload['operation'] == 'drop':
if os.environ.get('AWS_ENV') == 'dev':
host = os.environ.get('POSTGRES_HOST')
username = os.environ.get('POSTGRES_USER')
password = os.environ.get('POSTGRES_PASSWORD')
database_name = os.environ.get('POSTGRES_DB')
else:
pass
from cognitive_architecture.database.postgres import drop_database, create_admin_engine
engine = create_admin_engine(username, password, host, database_name)
drop_database(engine)
return JSONResponse(content={"response": "DB dropped"}, status_code=200)
else:
if os.environ.get('AWS_ENV') == 'dev':
host = os.environ.get('POSTGRES_HOST')
username = os.environ.get('POSTGRES_USER')
password = os.environ.get('POSTGRES_PASSWORD')
database_name = os.environ.get('POSTGRES_DB')
else:
pass
from cognitive_architecture.database.postgres import create_database, create_admin_engine
engine = create_admin_engine(username, password, host, database_name)
create_database(engine)
return JSONResponse(content={"response": " DB created"}, status_code=200)
except Exception as e:
return HTTPException(status_code=500, detail=str(e))
@app.post("/create-public-memory")
async def create_public_memory(payload: Payload):
try:
decoded_payload = payload.payload
if 'user_id' in decoded_payload and decoded_payload['user_id'] is not None:
user_id = decoded_payload['user_id']
else:
user_id = None
if 'labels' in decoded_payload and decoded_payload['labels'] is not None:
labels = decoded_payload['labels']
else:
labels = None
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
topic = decoded_payload['topic']
else:
topic = None
# Execute the query - replace this with the actual execution method
# async with session_scope(session=AsyncSessionLocal()) as session:
# from main import create_public_memory
# Assuming you have a method in Neo4jGraphDB to execute the query
result = await create_public_memory(user_id=user_id, labels=labels, topic=topic)
return JSONResponse(content={"response": result}, status_code=200)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/attach-user-to-public-memory")
async def attach_user_to_public_memory(payload: Payload):
try:
decoded_payload = payload.payload
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
topic = decoded_payload['topic']
else:
topic = None
if 'labels' in decoded_payload and decoded_payload['labels'] is not None:
labels = decoded_payload['labels']
else:
labels = ['sr']
# Execute the query - replace this with the actual execution method
async with session_scope(session=AsyncSessionLocal()) as session:
from main import attach_user_to_memory, create_public_memory
# Assuming you have a method in Neo4jGraphDB to execute the query
await create_public_memory(user_id=decoded_payload['user_id'], topic=topic, labels=labels)
result = await attach_user_to_memory( user_id = decoded_payload['user_id'], topic=topic, labels=labels)
return JSONResponse(content={"response": result}, status_code=200)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/unlink-user-from-public-memory")
async def unlink_user_from_public_memory(payload: Payload):
try:
decoded_payload = payload.payload
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
topic = decoded_payload['topic']
else:
topic = None
# Execute the query - replace this with the actual execution method
async with session_scope(session=AsyncSessionLocal()) as session:
from main import unlink_user_from_memory
# Assuming you have a method in Neo4jGraphDB to execute the query
result = await unlink_user_from_memory( user_id = decoded_payload['user_id'], topic=topic)
return JSONResponse(content={"response": result}, status_code=200)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
Start the API server using uvicorn.

View file

@ -73,11 +73,11 @@ def classify_retrieval():
# classify documents according to type of document
async def classify_call(query, context, document_types):
async def classify_call(query, document_summaries):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. Determine what document types are relevant : {query}, Context: {context}, Book_types:{document_types}"""
"""You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries:{document_summaries}"""
)
json_structure = [{
"name": "classifier",
@ -85,20 +85,20 @@ async def classify_call(query, context, document_types):
"parameters": {
"type": "object",
"properties": {
"DocumentCategory": {
"DocumentSummary": {
"type": "string",
"description": "The classification of documents in groups such as legal, medical, etc."
"description": "The summary of the document and the topic it deals with."
}
}, "required": ["DocumentCategory"] }
}, "required": ["DocumentSummary"] }
}]
chain_filter = prompt_classify | llm.bind(function_call={"name": "classifier"}, functions=json_structure)
classifier_output = await chain_filter.ainvoke({"query": query, "context": context, "document_types": document_types})
classifier_output = await chain_filter.ainvoke({"query": query, "document_summaries": document_summaries})
arguments_str = classifier_output.additional_kwargs['function_call']['arguments']
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
classfier_value = arguments_dict.get('DocumentCategory', None)
classfier_value = arguments_dict.get('DocumentSummary', None)
print("This is the classifier value", classfier_value)

View file

@ -71,6 +71,19 @@ def create_database(username, password, host, db_name):
engine.dispose()
def drop_database(username, password, host, db_name):
engine = create_admin_engine(username, password, host)
connection = engine.raw_connection()
connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
cursor = connection.cursor()
cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
cursor.close()
connection.close()
engine.dispose()
print(f"Database {db_name} dropped successfully.")
def create_tables(engine):
Base.metadata.create_all(bind=engine)

View file

@ -5,6 +5,9 @@
import logging
import os
from neo4j import AsyncSession
from neo4j.exceptions import Neo4jError
print(os.getcwd())
import networkx as nx
@ -25,8 +28,10 @@ from abc import ABC, abstractmethod
# Allows the return of Pydantic model rather than raw JSON
from pydantic import BaseModel, Field
from typing import List
from ...utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping
from typing import List, Dict, Optional
from ...utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, \
create_node_variable_mapping, get_unsumarized_vector_db_namespace
DEFAULT_PRESET = "promethai_chat"
preset_options = [DEFAULT_PRESET]
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
@ -113,7 +118,8 @@ class KnowledgeGraph(BaseModel):
nodes: List[Node] = Field(..., default_factory=list)
edges: List[Edge] = Field(..., default_factory=list)
class GraphQLQuery(BaseModel):
query: str
#
def generate_graph(input) -> KnowledgeGraph:
@ -185,13 +191,25 @@ class AbstractGraphDB(ABC):
class Neo4jGraphDB(AbstractGraphDB):
def __init__(self, url, username, password):
self.graph = Neo4jGraph(url=url, username=username, password=password)
# self.graph = Neo4jGraph(url=url, username=username, password=password)
from neo4j import GraphDatabase
self.driver = GraphDatabase.driver(url, auth=(username, password))
self.openai_key = config.openai_key
def close(self):
# Method to close the Neo4j driver instance
self.driver.close()
def query(self, query, params=None):
return self.graph.query(query, params)
try:
with self.driver.session() as session:
result = session.run(query, params).data()
return result
except Exception as e:
logging.error(f"An error occurred while executing the query: {e}")
raise e
@ -209,7 +227,7 @@ class Neo4jGraphDB(AbstractGraphDB):
return user_memory_cypher
def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph:
return openai.ChatCompletion.create(
return aclient.chat.completions.create(
model=config.model,
messages=[
{
@ -255,6 +273,22 @@ class Neo4jGraphDB(AbstractGraphDB):
response_model=KnowledgeGraph,
)
def cypher_statement_correcting(self, input: str) ->str:
return aclient.chat.completions.create(
model=config.model,
messages=[
{
"role": "user",
"content": f"""Check the cypher query for syntax issues, and fix any if found and return it as is: {input}. """,
},
{"role": "system", "content": """You are a top-tier algorithm
designed for checking cypher queries for neo4j graph databases. You have to return input provided to you as is"""}
],
response_model=GraphQLQuery,
)
def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping):
create_statements = []
for node in nodes:
@ -324,6 +358,10 @@ class Neo4jGraphDB(AbstractGraphDB):
# # Combine all statements
cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
cypher_statements_joined = "\n".join(cypher_statements)
logging.info("User Cypher Query raw: %s", cypher_statements_joined)
# corrected_cypher_statements = self.cypher_statement_correcting(input = cypher_statements_joined)
# logging.info("User Cypher Query: %s", corrected_cypher_statements.query)
# return corrected_cypher_statements.query
return cypher_statements_joined
@ -334,7 +372,7 @@ class Neo4jGraphDB(AbstractGraphDB):
def delete_all_user_memories(self, user_id):
try:
# Check if the user exists
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
user_exists = self.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
if not user_exists:
return f"No user found with ID: {user_id}"
@ -348,7 +386,7 @@ class Neo4jGraphDB(AbstractGraphDB):
MATCH (user)-[:HAS_BUFFER]->(buffer)
DETACH DELETE semantic, episodic, buffer
"""
self.graph.query(delete_query)
self.query(delete_query)
return f"All memories deleted for user ID: {user_id}"
except Exception as e:
return f"An error occurred: {str(e)}"
@ -356,7 +394,7 @@ class Neo4jGraphDB(AbstractGraphDB):
def delete_specific_memory_type(self, user_id, memory_type):
try:
# Check if the user exists
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
user_exists = self.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
if not user_exists:
return f"No user found with ID: {user_id}"
@ -369,7 +407,7 @@ class Neo4jGraphDB(AbstractGraphDB):
MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory)
DETACH DELETE memory
"""
self.graph.query(delete_query)
self.query(delete_query)
return f"{memory_type} deleted for user ID: {user_id}"
except Exception as e:
return f"An error occurred: {str(e)}"
@ -396,6 +434,15 @@ class Neo4jGraphDB(AbstractGraphDB):
RETURN item
"""
return self.query(query, params={"user_id": user_id})
def retrieve_public_memory(self, user_id: str):
query = """
MATCH (user:User {userId: $user_id})-[:HAS_PUBLIC_MEMORY]->(public:PublicMemory)
MATCH (public)-[:HAS_DOCUMENT]->(document)
RETURN document
"""
return self.query(query, params={"user_id": user_id})
def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
""" This function takes a document and generates a document summary in Semantic Memory"""
create_statements = []
@ -429,71 +476,142 @@ class Neo4jGraphDB(AbstractGraphDB):
return create_statements
async def get_document_categories(self, user_id: str):
async def get_memory_linked_document_summaries(self, user_id: str, memory_type: str = "PublicMemory"):
"""
Retrieve a list of categories for all documents associated with a given user.
Retrieve a list of summaries for all documents associated with a given memory type for a user.
This function executes a Cypher query in a Neo4j database to fetch the categories
of all 'Document' nodes that are linked to the 'SemanticMemory' node of the specified user.
Parameters:
- session (AsyncSession): The database session for executing the query.
- user_id (str): The unique identifier of the user.
Args:
user_id (str): The unique identifier of the user.
memory_type (str): The type of memory node ('SemanticMemory' or 'PublicMemory').
Returns:
- List[str]: A list of document categories associated with the user.
List[str]: A list of document categories associated with the memory type for the user.
Raises:
- Exception: If an error occurs during the database query execution.
Exception: If an error occurs during the database query execution.
"""
if memory_type == "PublicMemory":
relationship = "HAS_PUBLIC_MEMORY"
elif memory_type == "SemanticMemory":
relationship = "HAS_SEMANTIC_MEMORY"
try:
query = f'''
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
RETURN document.documentCategory AS category
MATCH (user:User {{userId: '{user_id}'}})-[:{relationship}]->(memory:{memory_type})-[:HAS_DOCUMENT]->(document:Document)
RETURN document.summary AS summary
'''
logging.info(f"Generated Cypher query: {query}")
return query
result = self.query(query)
logging.info("Result: ", result)
return [record.get("summary", "No summary available") for record in result]
except Exception as e:
logging.error(f"An error occurred while retrieving document categories: {str(e)}")
logging.error(f"An error occurred while retrieving document summary: {str(e)}")
return None
async def get_document_ids(self, user_id: str, category: str):
# async def get_document_categories(self, user_id: str):
# """
# Retrieve a list of categories for all documents associated with a given user.
#
# This function executes a Cypher query in a Neo4j database to fetch the categories
# of all 'Document' nodes that are linked to the 'SemanticMemory' node of the specified user.
#
# Parameters:
# - session (AsyncSession): The database session for executing the query.
# - user_id (str): The unique identifier of the user.
#
# Returns:
# - List[str]: A list of document categories associated with the user.
#
# Raises:
# - Exception: If an error occurs during the database query execution.
# """
# try:
# query = f'''
# MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
# RETURN document.documentCategory AS category
# '''
# logging.info(f"Generated Cypher query: {query}")
# return query
#
# except Exception as e:
# logging.error(f"An error occurred while retrieving document categories: {str(e)}")
# return None
# async def get_document_ids(self, user_id: str, category: str):
# """
# Retrieve a list of document IDs for a specific category associated with a given user.
#
# This function executes a Cypher query in a Neo4j database to fetch the IDs
# of all 'Document' nodes in a specific category that are linked to the 'SemanticMemory' node of the specified user.
#
# Parameters:
# - user_id (str): The unique identifier of the user.
# - category (str): The specific document category to filter by.
#
# Returns:
# - List[str]: A list of document IDs in the specified category associated with the user.
#
# Raises:
# - Exception: If an error occurs during the database query execution.
# """
# try:
# query = f'''
# MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{documentCategory: '{category}'}})
# RETURN document.d_id AS d_id
# '''
# logging.info(f"Generated Cypher query: {query}")
# return query
#
# except Exception as e:
# logging.error(f"An error occurred while retrieving document IDs: {str(e)}")
# return None
async def get_memory_linked_document_ids(self, user_id: str, summary: str, memory_type: str = "PUBLIC"):
"""
Retrieve a list of document IDs for a specific category associated with a given user.
Retrieve a list of document IDs for a specific category associated with a given memory type for a user.
This function executes a Cypher query in a Neo4j database to fetch the IDs
of all 'Document' nodes in a specific category that are linked to the 'SemanticMemory' node of the specified user.
Parameters:
- user_id (str): The unique identifier of the user.
- category (str): The specific document category to filter by.
Args:
user_id (str): The unique identifier of the user.
summary (str): The specific document summary to filter by.
memory_type (str): The type of memory node ('SemanticMemory' or 'PublicMemory').
Returns:
- List[str]: A list of document IDs in the specified category associated with the user.
List[str]: A list of document IDs in the specified category associated with the memory type for the user.
Raises:
- Exception: If an error occurs during the database query execution.
Exception: If an error occurs during the database query execution.
"""
if memory_type == "PublicMemory":
relationship = "HAS_PUBLIC_MEMORY"
elif memory_type == "SemanticMemory":
relationship = "HAS_SEMANTIC_MEMORY"
try:
query = f'''
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{documentCategory: '{category}'}})
MATCH (user:User {{userId: '{user_id}'}})-[:{relationship}]->(memory:{memory_type})-[:HAS_DOCUMENT]->(document:Document {{summary: '{summary}'}})
RETURN document.d_id AS d_id
'''
logging.info(f"Generated Cypher query: {query}")
return query
result = self.query(query)
return [record["d_id"] for record in result]
except Exception as e:
logging.error(f"An error occurred while retrieving document IDs: {str(e)}")
return None
def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str:
def create_document_node_cypher(self, document_summary: dict, user_id: str,
memory_type: str = "PublicMemory",public_memory_id:str=None) -> str:
"""
Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user.
Generate a Cypher query to create a Document node. If the memory type is 'Semantic',
link it to a SemanticMemory node for a user. If the memory type is 'PublicMemory',
only link the Document node to the PublicMemory node.
Parameters:
- document_summary (dict): A dictionary containing the document's category, title, and summary.
- document_summary (dict): A dictionary containing the document's category, title, summary, and document ID.
- user_id (str): The unique identifier for the user.
- memory_type (str): The type of memory node to link ("Semantic" or "PublicMemory"). Default is "PublicMemory".
Returns:
- str: A Cypher query string with parameters.
@ -509,21 +627,34 @@ class Neo4jGraphDB(AbstractGraphDB):
raise ValueError("The document_summary dictionary is missing required keys.")
if not isinstance(user_id, str) or not user_id:
raise ValueError("The user_id must be a non-empty string.")
if memory_type not in ["SemanticMemory", "PublicMemory"]:
raise ValueError("The memory_type must be either 'Semantic' or 'PublicMemory'.")
# Escape single quotes in the document summary data (if not using parameters)
# Escape single quotes in the document summary data
title = document_summary['Title'].replace("'", "\\'")
summary = document_summary['Summary'].replace("'", "\\'")
document_category = document_summary['DocumentCategory'].replace("'", "\\'")
d_id = document_summary['d_id'].replace("'", "\\'")
# Generate the Cypher query using parameters
cypher_query = f'''
memory_node_type = "SemanticMemory" if memory_type == "SemanticMemory" else "PublicMemory"
user_memory_link = ''
if memory_type == "SemanticMemory":
user_memory_link = f'''
// Ensure the User node exists
MERGE (user:User {{ userId: '{user_id}' }})
MERGE (memory:SemanticMemory {{ userId: '{user_id}' }})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(memory)
'''
elif memory_type == "PublicMemory":
logging.info(f"Public memory id: {public_memory_id}")
user_memory_link = f'''
// Merge with the existing PublicMemory node or create a new one if it does not exist
MATCH (memory:PublicMemory {{ memoryId: {public_memory_id} }})
'''
// Ensure the SemanticMemory node exists and is connected to the User
MERGE (semantic:SemanticMemory {{ userId: '{user_id}' }})
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
cypher_query = f'''
{user_memory_link}
// Create the Document node with its properties
CREATE (document:Document {{
@ -533,21 +664,42 @@ class Neo4jGraphDB(AbstractGraphDB):
d_id: '{d_id}'
}})
// Link the Document node to the SemanticMemory node
CREATE (semantic)-[:HAS_DOCUMENT]->(document)
// Link the Document node to the {memory_node_type} node
MERGE (memory)-[:HAS_DOCUMENT]->(document)
'''
logging.info(f"Generated Cypher query: {cypher_query}")
return cypher_query
def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str, document_id: str):
# Generate the Cypher query
def update_document_node_with_db_ids(self, vectordb_namespace: str, document_id: str, user_id: str = None):
"""
Update the namespace of a Document node in the database. The document can be linked
either to a SemanticMemory node (if a user ID is provided) or to a PublicMemory node.
Parameters:
- vectordb_namespace (str): The namespace to set for the vectordb.
- document_id (str): The unique identifier of the document.
- user_id (str, optional): The unique identifier for the user. Default is None.
Returns:
- str: A Cypher query string to perform the update.
"""
if user_id:
# Update for a document linked to a SemanticMemory node
cypher_query = f'''
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
SET document.vectordbNamespace = '{vectordb_namespace}'
RETURN document
'''
else:
# Update for a document linked to a PublicMemory node
cypher_query = f'''
MATCH (:PublicMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
SET document.vectordbNamespace = '{vectordb_namespace}'
RETURN document
'''
return cypher_query
@ -581,6 +733,278 @@ class Neo4jGraphDB(AbstractGraphDB):
logging.error(f"An error occurred while retrieving namespaces by document category: {str(e)}")
return None
async def create_memory_node(self, labels, topic=None):
"""
Create or find a memory node of the specified type with labels and a description.
Args:
labels (List[str]): A list of labels for the node.
topic (str, optional): The type of memory node to create or find. Defaults to "PublicMemory".
Returns:
int: The ID of the created or found memory node.
Raises:
ValueError: If input parameters are invalid.
Neo4jError: If an error occurs during the database operation.
"""
if topic is None:
topic = "PublicMemory"
# Prepare labels as a string
label_list = ', '.join(f"'{label}'" for label in labels)
# Cypher query to find or create the memory node with the given description and labels
memory_cypher = f"""
MERGE (memory:{topic} {{description: '{topic}', label: [{label_list}]}})
SET memory.memoryId = ID(memory)
RETURN id(memory) AS memoryId
"""
try:
result = self.query(memory_cypher)
# Assuming the result is a list of records, where each record contains 'memoryId'
memory_id = result[0]['memoryId'] if result else None
self.close()
return memory_id
except Neo4jError as e:
logging.error(f"Error creating or finding memory node: {e}")
raise
def link_user_to_public(self, user_id: str, public_property_value: str, public_property_name: str = 'name',
relationship_type: str = 'HAS_PUBLIC'):
if not user_id or not public_property_value:
raise ValueError("Valid User ID and Public property value are required for linking.")
try:
link_cypher = f"""
MATCH (user:User {{userId: '{user_id}'}})
MATCH (public:Public {{{public_property_name}: '{public_property_value}'}})
MERGE (user)-[:{relationship_type}]->(public)
"""
self.query(link_cypher)
except Neo4jError as e:
logging.error(f"Error linking Public node to user: {e}")
raise
def delete_memory_node(self, memory_id: int, topic: str) -> None:
if not memory_id or not topic:
raise ValueError("Memory ID and Topic are required for deletion.")
try:
delete_cypher = f"""
MATCH ({topic.lower()}: {topic}) WHERE id({topic.lower()}) = {memory_id}
DETACH DELETE {topic.lower()}
"""
logging.info("Delete Cypher Query: %s", delete_cypher)
self.query(delete_cypher)
except Neo4jError as e:
logging.error(f"Error deleting {topic} memory node: {e}")
raise
def unlink_memory_from_user(self, memory_id: int, user_id: str, topic: str='PublicMemory') -> None:
"""
Unlink a memory node from a user node.
Parameters:
- memory_id (int): The internal ID of the memory node.
- user_id (str): The unique identifier for the user.
- memory_type (str): The type of memory node to unlink ("SemanticMemory" or "PublicMemory").
Raises:
- ValueError: If any required data is missing or invalid.
"""
if not user_id or not isinstance(memory_id, int):
raise ValueError("Valid User ID and Memory ID are required for unlinking.")
if topic not in ["SemanticMemory", "PublicMemory"]:
raise ValueError("The memory_type must be either 'SemanticMemory' or 'PublicMemory'.")
relationship_type = "HAS_SEMANTIC_MEMORY" if topic == "SemanticMemory" else "HAS_PUBLIC_MEMORY"
try:
unlink_cypher = f"""
MATCH (user:User {{userId: '{user_id}'}})-[r:{relationship_type}]->(memory:{topic}) WHERE id(memory) = {memory_id}
DELETE r
"""
self.query(unlink_cypher)
except Neo4jError as e:
logging.error(f"Error unlinking {topic} from user: {e}")
raise
def link_public_memory_to_user(self, memory_id, user_id):
# Link an existing Public Memory node to a User node
link_cypher = f"""
MATCH (user:User {{userId: '{user_id}'}})
MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {memory_id}
MERGE (user)-[:HAS_PUBLIC_MEMORY]->(publicMemory)
"""
self.query(link_cypher)
def retrieve_node_id_for_memory_type(self, topic: str = 'SemanticMemory'):
link_cypher = f""" MATCH(publicMemory: {topic})
RETURN
id(publicMemory)
AS
memoryId """
node_ids = self.query(link_cypher)
return node_ids
# def retrieve_linked_memory_for_user(self, user_id: str, topic: str, relationship_type: str = 'HAS_MEMORY'):
# query = f"""
# MATCH (user:User {{userId: $user_id}})-[:{relationship_type}]->({topic.lower()}:{topic})
# RETURN {topic.lower()}
# """
# return self.query(query, params={"user_id": user_id})
#
#
# async def link_memory_to_user(self, memory_id: int, user_id: str, relationship_type: str = 'HAS_MEMORY') -> None:
# """
# Link a memory node to a user with a specified relationship type.
#
# Args:
# memory_id (int): The ID of the memory node.
# user_id (str): The user ID to link the memory to.
# relationship_type (str): The type of relationship.
#
# Raises:
# ValueError: If input parameters are invalid.
# Neo4jError: If an error occurs during the database operation.
# """
# if not user_id or not memory_id:
# raise ValueError("User ID and Memory ID are required for linking.")
#
# try:
# link_cypher = f"""
# MATCH (user:User {{userId: '{user_id}'}})
# MATCH (memory) WHERE id(memory) = {memory_id}
# MERGE (user)-[:{relationship_type}]->(memory)
# """
# await self.query(link_cypher)
# except Neo4jError as e:
# logging.error(f"Error linking memory to user: {e}")
# raise
#
# async def delete_memory_node(self, memory_id: int, memory_type: str) -> None:
# """
# Delete a memory node of a specified type.
#
# Args:
# memory_id (int): The ID of the memory node to delete.
# memory_type (str): The type of the memory node.
#
# Raises:
# ValueError: If input parameters are invalid.
# Neo4jError: If an error occurs during the database operation.
# """
# if not memory_id or not memory_type:
# raise ValueError("Memory ID and Memory Type are required for deletion.")
#
# try:
# delete_cypher = f"""
# MATCH (memory:{memory_type}) WHERE id(memory) = {memory_id}
# DETACH DELETE memory
# """
# await self.query(delete_cypher)
# except Neo4jError as e:
# logging.error(f"Error deleting memory node: {e}")
# raise
#
# async def unlink_memory_from_user(self, memory_id: int, user_id: str, relationship_type: str = 'HAS_MEMORY') -> None:
# """
# Unlink a memory node from a user.
#
# Args:
# memory_id (int): The ID of the memory node.
# user_id (str): The user ID to unlink from the memory.
# relationship_type (str): The type of relationship.
#
# Raises:
# ValueError: If input parameters are invalid.
# Neo4jError: If an error occurs during the database operation.
# """
# if not user_id or not memory_id:
# raise ValueError("User ID and Memory ID are required for unlinking.")
#
# try:
# unlink_cypher = f"""
# MATCH (user:User {{userId: '{user_id}'}})-[r:{relationship_type}]->(memory) WHERE id(memory) = {memory_id}
# DELETE r
# """
# await self.query(unlink_cypher)
# except Neo4jError as e:
# logging.error(f"Error unlinking memory from user: {e}")
# raise
#
#
# def create_public_memory(self, labels, topic=None):
# if topic is None:
# topic = "SerbianArchitecture"
# topicMemoryId = topic + "MemoryId"
# # Create an independent Architecture Memory node with countries as properties
# label_list = ', '.join(f"'{label}'" for label in labels) # Prepare countries list as a string
# architecture_memory_cypher = f"""
# CREATE ({topic.lower()}:{topic} {{description: '{topic}', label: [{label_list}]}})
# RETURN id({topic.lower()}) AS {topicMemoryId}
# """
# return self.query(architecture_memory_cypher)
#
# def link_public_memory_to_user(self, public_memory_id, user_id):
# # Link an existing Public Memory node to a User node
# link_cypher = f"""
# MATCH (user:User {{userId: '{user_id}'}})
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
# MERGE (user)-[:HAS_PUBLIC_MEMORY]->(publicMemory)
# """
# self.query(link_cypher)
#
# def link_public_memory_to_architecture(self, public_memory_id):
# # Link the Public Memory node to the Architecture Memory node
# link_cypher = f"""
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
# MATCH (architecture:Architecture {{description: 'Architecture'}})
# MERGE (publicMemory)-[:INCLUDES]->(architecture)
# """
# self.query(link_cypher)
#
# def delete_public_memory(self, public_memory_id):
# # Delete a Public Memory node by its ID
# delete_cypher = f"""
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
# DETACH DELETE publicMemory
# """
# self.query(delete_cypher)
#
# def delete_architecture_memory(self, architecture_memory_id):
# # Delete an Architecture Memory node by its ID
# delete_cypher = f"""
# MATCH (architecture:Architecture) WHERE id(architecture) = {architecture_memory_id}
# DETACH DELETE architecture
# """
# self.query(delete_cypher)
#
# def unlink_public_memory_from_user(self, public_memory_id, user_id):
# # Unlink a Public Memory node from a User node
# unlink_cypher = f"""
# MATCH (user:User {{userId: '{user_id}'}})-[r:HAS_PUBLIC_MEMORY]->(publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
# DELETE r
# """
# self.query(unlink_cypher)
#
# def unlink_public_memory_from_architecture(self, public_memory_id):
# # Unlink the Public Memory node from the Architecture Memory node
# unlink_cypher = f"""
# MATCH (publicMemory:PublicMemory)-[r:INCLUDES]->(architecture:Architecture) WHERE id(publicMemory) = {public_memory_id}
# DELETE r
# """
# self.query(unlink_cypher)
class NetworkXGraphDB:
def __init__(self, filename='networkx_graph.pkl'):

View file

@ -12,6 +12,7 @@ class DocsModel(Base):
operation_id = Column(String, ForeignKey('operations.id'), index=True)
doc_name = Column(String, nullable=True)
graph_summary = Column(Boolean, nullable=True)
memory_category = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, onupdate=datetime.utcnow)

View file

@ -12,6 +12,7 @@ class MemoryModel(Base):
user_id = Column(String, ForeignKey('users.id'), index=True)
operation_id = Column(String, ForeignKey('operations.id'), index=True)
memory_name = Column(String, nullable=True)
memory_category = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, onupdate=datetime.utcnow)
methods_list = Column(String , nullable=True)

View file

@ -1,7 +1,4 @@
from langchain.document_loaders import PyPDFLoader
import sys, os
from cognitive_architecture.shared.chunk_strategy import ChunkStrategy
from cognitive_architecture.database.vectordb.chunkers.chunk_strategy import ChunkStrategy
import re
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
@ -32,10 +29,13 @@ def vanilla_chunker(source_data, chunk_size=100, chunk_overlap=20):
chunk_overlap=chunk_overlap,
length_function=len
)
try:
# try:
# pages = text_splitter.create_documents([source_data])
# except:
# try:
pages = text_splitter.create_documents([source_data])
except:
pages = text_splitter.create_documents(source_data.content)
# except:
# pages = text_splitter.create_documents(source_data.content)
# pages = source_data.load_and_split()
return pages

View file

@ -0,0 +1,151 @@
import os
import requests
import json
from .embeddings import Embeddings
from .vector_db import VectorDB
from .response import Response
class CogneeManager:
def __init__(self, embeddings: Embeddings = None,
vector_db: VectorDB = None,
vector_db_key: str = None,
embedding_api_key: str = None,
webhook_url: str = None,
lines_per_batch: int = 1000,
webhook_key: str = None,
document_id: str = None,
chunk_validation_url: str = None,
internal_api_key: str = "test123",
base_url="http://localhost:8000"):
self.embeddings = embeddings if embeddings else Embeddings()
self.vector_db = vector_db if vector_db else VectorDB()
self.webhook_url = webhook_url
self.lines_per_batch = lines_per_batch
self.webhook_key = webhook_key
self.document_id = document_id
self.chunk_validation_url = chunk_validation_url
self.vector_db_key = vector_db_key
self.embeddings_api_key = embedding_api_key
self.internal_api_key = internal_api_key
self.base_url = base_url
def serialize(self):
data = {
'EmbeddingsMetadata': json.dumps(self.embeddings.serialize()),
'VectorDBMetadata': json.dumps(self.vector_db.serialize()),
'WebhookURL': self.webhook_url,
'LinesPerBatch': self.lines_per_batch,
'DocumentID': self.document_id,
'ChunkValidationURL': self.chunk_validation_url,
}
return {k: v for k, v in data.items() if v is not None}
def upload(self, file_paths: list[str], base_url=None):
if base_url:
url = base_url + "/jobs"
else:
url = self.base_url + "/jobs"
data = self.serialize()
headers = self.generate_headers()
multipart_form_data = [('file', (os.path.basename(filepath), open(filepath, 'rb'), 'application/octet-stream'))
for filepath in file_paths]
print(f"embedding {len(file_paths)} documents at {url}")
response = requests.post(url, files=multipart_form_data, headers=headers, stream=True, data=data)
if response.status_code == 500:
print(response.text)
return Response(error=response.text, status_code=response.status_code)
response_json = response.json()
if response.status_code >= 400 and response.status_code < 500:
print(f"Error: {response_json['error']}")
return Response.from_json(response_json, response.status_code)
def get_job_statuses(self, job_ids: list[int], base_url=None):
if base_url:
url = base_url + "/jobs/status"
else:
url = self.base_url + "/jobs/status"
headers = {
"Authorization": self.internal_api_key,
}
data = {
'JobIDs': job_ids
}
print(f"retrieving job statuses for {len(job_ids)} jobs at {url}")
response = requests.post(url, headers=headers, json=data)
if response.status_code == 500:
print(response.text)
return Response(error=response.text, status_code=response.status_code)
response_json = response.json()
if response.status_code >= 400 and response.status_code < 500:
print(f"Error: {response_json['error']}")
return Response.from_json(response_json, response.status_code)
def embed(self, filepath, base_url=None):
if base_url:
url = base_url + "/embed"
else:
url = self.base_url + "/embed"
data = self.serialize()
headers = self.generate_headers()
files = {
'SourceData': open(filepath, 'rb')
}
print(f"embedding document at file path {filepath} at {url}")
response = requests.post(url, headers=headers, data=data, files=files)
if response.status_code == 500:
print(response.text)
return Response(error=response.text, status_code=response.status_code)
response_json = response.json()
if response.status_code >= 400 and response.status_code < 500:
print(f"Error: {response_json['error']}")
return Response.from_json(response_json, response.status_code)
def get_job_status(self, job_id, base_url=None):
if base_url:
url = base_url + "/jobs/" + str(job_id) + "/status"
else:
url = self.base_url + "/jobs/" + str(job_id) + "/status"
headers = {
"Authorization": self.internal_api_key,
}
print(f"retrieving job status for job {job_id} at {url}")
response = requests.get(url, headers=headers)
if response.status_code == 500:
print(response.text)
return Response(error=response.text, status_code=response.status_code)
response_json = response.json()
if response.status_code >= 400 and response.status_code < 500:
print(f"Error: {response_json['error']}")
return Response.from_json(response_json, response.status_code)
def generate_headers(self):
headers = {
"Authorization": self.internal_api_key,
"X-EmbeddingAPI-Key": self.embeddings_api_key,
"X-VectorDB-Key": self.vector_db_key,
"X-Webhook-Key": self.webhook_key
}
return {k: v for k, v in headers.items() if v is not None}

View file

@ -0,0 +1,29 @@
from .embeddings_type import EmbeddingsType
from ..chunkers.chunk_strategy import ChunkStrategy
class Embeddings:
def __init__(self, embeddings_type: EmbeddingsType = EmbeddingsType.OPEN_AI,
chunk_size: int = 256,
chunk_overlap: int = 128,
chunk_strategy: ChunkStrategy = ChunkStrategy.EXACT,
docker_image: str = None,
hugging_face_model_name: str = None):
self.embeddings_type = embeddings_type
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
self.chunk_strategy = chunk_strategy
self.docker_image = docker_image
self.hugging_face_model_name = hugging_face_model_name
def serialize(self):
data = {
'embeddings_type': self.embeddings_type.name if self.embeddings_type else None,
'chunk_size': self.chunk_size,
'chunk_overlap': self.chunk_overlap,
'chunk_strategy': self.chunk_strategy.name if self.chunk_strategy else None,
'docker_image': self.docker_image,
'hugging_face_model_name': self.hugging_face_model_name
}
return {k: v for k, v in data.items() if v is not None}

View file

@ -0,0 +1,8 @@
from enum import Enum
class EmbeddingsType(Enum):
OPEN_AI = 'open_ai'
COHERE = 'cohere'
SELF_HOSTED = 'self_hosted'
HUGGING_FACE = 'hugging_face'
IMAGE = 'image'

View file

@ -0,0 +1,18 @@
class Job:
def __init__(self, job_id, job_status=None, filename=None):
self.job_id = job_id
self.job_status = job_status
self.filename = filename
def __str__(self):
attributes = []
if self.job_id is not None:
attributes.append(f"job_id: {self.job_id}")
if self.job_status is not None:
attributes.append(f"job_status: {self.job_status}")
if self.filename is not None:
attributes.append(f"filename: {self.filename}")
return "Job(" + ", ".join(attributes) + ")"
def __repr__(self):
return self.__str__()

View file

@ -4,6 +4,7 @@ import os
import sys
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
from cognitive_architecture.shared.language_processing import translate_text, detect_language
from langchain.document_loaders import UnstructuredURLLoader
from langchain.document_loaders import DirectoryLoader
@ -11,14 +12,45 @@ import logging
import os
from langchain.document_loaders import TextLoader
import requests
async def _document_loader( observation: str, loader_settings: dict):
async def fetch_pdf_content(file_url):
response = requests.get(file_url)
pdf_stream = BytesIO(response.content)
with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
return "".join(page.get_text() for page in doc)
async def fetch_text_content(file_url):
loader = UnstructuredURLLoader(urls=file_url)
return loader.load()
async def process_content(content, metadata, loader_strategy, chunk_size, chunk_overlap):
pages = chunk_data(chunk_strategy=loader_strategy, source_data=content, chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
if metadata is None:
metadata = {"metadata": "None"}
chunk_count= 0
for chunk in pages:
chunk_count+=1
chunk.metadata = metadata
chunk.metadata["chunk_count"]=chunk_count
if detect_language(pages) != "en":
logging.info("Translating Page")
for page in pages:
if detect_language(page.page_content) != "en":
page.page_content = translate_text(page.page_content)
return pages
async def _document_loader(observation: str, loader_settings: dict):
document_format = loader_settings.get("format", "text")
loader_strategy = loader_settings.get("strategy", "VANILLA")
chunk_size = loader_settings.get("chunk_size", 500)
chunk_overlap = loader_settings.get("chunk_overlap", 20)
logging.info("LOADER SETTINGS %s", loader_settings)
list_of_docs = loader_settings["path"]
@ -27,55 +59,146 @@ async def _document_loader( observation: str, loader_settings: dict):
if loader_settings.get("source") == "URL":
for file in list_of_docs:
if document_format == "PDF":
logging.info("File is %s", file)
pdf_response = requests.get(file)
pdf_stream = BytesIO(pdf_response.content)
with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
file_content = ""
for page in doc:
file_content += page.get_text()
pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
chunked_doc.append(pages)
content = await fetch_pdf_content(file)
elif document_format == "TEXT":
loader = UnstructuredURLLoader(urls=file)
file_content = loader.load()
pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
content = await fetch_text_content(file)
else:
raise ValueError(f"Unsupported document format: {document_format}")
pages = await process_content(content, metadata=None, loader_strategy=loader_strategy, chunk_size= chunk_size, chunk_overlap= chunk_overlap)
chunked_doc.append(pages)
elif loader_settings.get("source") == "DEVICE":
if loader_settings.get("bulk_load", False) == True:
current_directory = os.getcwd()
logging.info("Current Directory: %s", current_directory)
loader = DirectoryLoader(".data", recursive=True)
if document_format == "PDF":
# loader = SimpleDirectoryReader(".data", recursive=True, exclude_hidden=True)
documents = loader.load()
pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
logging.info("Documents: %s", documents)
# pages = documents.load_and_split()
for document in documents:
# print ("Document: ", document.page_content)
pages = await process_content(content= str(document.page_content), metadata=document.metadata, loader_strategy= loader_strategy, chunk_size = chunk_size, chunk_overlap = chunk_overlap)
chunked_doc.append(pages)
elif document_format == "TEXT":
documents = loader.load()
pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
logging.info("Documents: %s", documents)
# pages = documents.load_and_split()
chunked_doc.append(pages)
else:
raise ValueError(f"Error: ")
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader(loader_settings.get("single_document_path"))
documents= loader.load()
for document in documents:
pages = await process_content(content=str(document.page_content), metadata=document.metadata,
loader_strategy=loader_strategy, chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
chunked_doc.append(pages)
else:
raise ValueError(f"Unsupported source type: {loader_settings.get('source')}")
return chunked_doc
# async def _document_loader( observation: str, loader_settings: dict):
#
# document_format = loader_settings.get("format", "text")
# loader_strategy = loader_settings.get("strategy", "VANILLA")
# chunk_size = loader_settings.get("chunk_size", 500)
# chunk_overlap = loader_settings.get("chunk_overlap", 20)
#
#
# logging.info("LOADER SETTINGS %s", loader_settings)
#
# list_of_docs = loader_settings["path"]
# chunked_doc = []
#
# if loader_settings.get("source") == "URL":
# for file in list_of_docs:
# if document_format == "PDF":
# logging.info("File is %s", file)
# pdf_response = requests.get(file)
# pdf_stream = BytesIO(pdf_response.content)
# with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
# file_content = ""
# for page in doc:
# file_content += page.get_text()
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
# from cognitive_architecture.shared.language_processing import translate_text,detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")
# for page in pages:
# if detect_language(page.page_content) != "en":
# logging.info("Translating Page")
# page.page_content = translate_text(page.page_content)
#
# chunked_doc.append(pages)
#
# logging.info("Document translation complete. Proceeding...")
#
# chunked_doc.append(pages)
#
# elif document_format == "TEXT":
# loader = UnstructuredURLLoader(urls=file)
# file_content = loader.load()
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
#
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")
# for page in pages:
# if detect_language(page.page_content) != "en":
# logging.info("Translating Page")
# page.page_content = translate_text(page.page_content)
#
# chunked_doc.append(pages)
#
# logging.info("Document translation complete. Proceeding...")
#
# chunked_doc.append(pages)
#
# elif loader_settings.get("source") == "DEVICE":
#
# current_directory = os.getcwd()
# logging.info("Current Directory: %s", current_directory)
#
# loader = DirectoryLoader(".data", recursive=True)
# if document_format == "PDF":
# # loader = SimpleDirectoryReader(".data", recursive=True, exclude_hidden=True)
# documents = loader.load()
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
# logging.info("Documents: %s", documents)
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")
# for page in pages:
# if detect_language(page.page_content) != "en":
# logging.info("Translating Page")
# page.page_content = translate_text(page.page_content)
#
# chunked_doc.append(pages)
#
# logging.info("Document translation complete. Proceeding...")
#
# # pages = documents.load_and_split()
# chunked_doc.append(pages)
#
#
# elif document_format == "TEXT":
# documents = loader.load()
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
# logging.info("Documents: %s", documents)
# # pages = documents.load_and_split()
# chunked_doc.append(pages)
#
# else:
# raise ValueError(f"Error: ")
# return chunked_doc

View file

@ -0,0 +1,72 @@
from .job import Job
class Response:
def __init__(self, error=None, message=None, successful_uploads=None, failed_uploads=None,
empty_files_count=None, duplicate_files_count=None, job_id=None,
jobs=None, job_status=None, status_code=None):
self.error = error
self.message = message
self.successful_uploads = successful_uploads
self.failed_uploads = failed_uploads
self.empty_files_count = empty_files_count
self.duplicate_files_count = duplicate_files_count
self.job_id = job_id
self.jobs = jobs
self.job_status = job_status
self.status_code = status_code
@classmethod
def from_json(cls, json_dict, status_code):
successful_uploads = cls._convert_successful_uploads_to_jobs(json_dict.get('successful_uploads', None))
jobs = cls._convert_to_jobs(json_dict.get('Jobs', None))
return cls(
error=json_dict.get('error'),
message=json_dict.get('message'),
successful_uploads=successful_uploads,
failed_uploads=json_dict.get('failed_uploads'),
empty_files_count=json_dict.get('empty_files_count'),
duplicate_files_count=json_dict.get('duplicate_files_count'),
job_id=json_dict.get('JobID'),
jobs=jobs,
job_status=json_dict.get('JobStatus'),
status_code=status_code
)
@classmethod
def _convert_successful_uploads_to_jobs(cls, successful_uploads):
if not successful_uploads:
return None
return [Job(filename=key, job_id=val) for key, val in successful_uploads.items()]
@classmethod
def _convert_to_jobs(cls, jobs):
if not jobs:
return None
return [Job(job_id=job['JobID'], job_status=job['JobStatus']) for job in jobs]
def __str__(self):
attributes = []
if self.error is not None:
attributes.append(f"error: {self.error}")
if self.message is not None:
attributes.append(f"message: {self.message}")
if self.successful_uploads is not None:
attributes.append(f"successful_uploads: {str(self.successful_uploads)}")
if self.failed_uploads is not None:
attributes.append(f"failed_uploads: {self.failed_uploads}")
if self.empty_files_count is not None:
attributes.append(f"empty_files_count: {self.empty_files_count}")
if self.duplicate_files_count is not None:
attributes.append(f"duplicate_files_count: {self.duplicate_files_count}")
if self.job_id is not None:
attributes.append(f"job_id: {self.job_id}")
if self.jobs is not None:
attributes.append(f"jobs: {str(self.jobs)}")
if self.job_status is not None:
attributes.append(f"job_status: {self.job_status}")
if self.status_code is not None:
attributes.append(f"status_code: {self.status_code}")
return "Response(" + ", ".join(attributes) + ")"

View file

@ -0,0 +1,13 @@
from enum import Enum
class VectorDBType(Enum):
PINECONE = 'pinecone'
WEAVIATE = 'weaviate'
MILVUS = 'milvus'
QDRANT = 'qdrant'
DEEPLAKE = 'deeplake'
VESPA = 'vespa'
PGVECTOR = 'pgvector'
REDIS = 'redis'
LANCEDB = 'lancedb'
MONGODB = 'mongodb'

View file

@ -130,8 +130,6 @@ class WeaviateVectorDB(VectorDB):
def _stuct(self, observation, params, metadata_schema_class =None):
"""Utility function to create the document structure with optional custom fields."""
# Construct document data
document_data = {
"metadata": params,
@ -152,18 +150,20 @@ class WeaviateVectorDB(VectorDB):
if namespace is None:
namespace = self.namespace
params['user_id'] = self.user_id
logging.info("User id is %s", self.user_id)
retriever = self.init_weaviate(embeddings=OpenAIEmbeddings(),namespace = namespace, retriever_type="single_document_context")
if loader_settings:
# Assuming _document_loader returns a list of documents
documents = await _document_loader(observation, loader_settings)
logging.info("here are the docs %s", str(documents))
chunk_count = 0
for doc in documents[0]:
for doc_list in documents:
for doc in doc_list:
chunk_count += 1
params['chunk_order'] = chunk_count
# document_to_load = self._stuct(doc.page_content, params, metadata_schema_class)
# logging.info("Loading document with provided loader settings %s", str(document_to_load))
params['chunk_count'] = doc.metadata.get("chunk_count", "None")
logging.info("Loading document with provided loader settings %s", str(doc))
params['source'] = doc.metadata.get("source", "None")
logging.info("Params are %s", str(params))
retriever.add_documents([
Document(metadata=params, page_content=doc.page_content)])
else:
@ -174,16 +174,13 @@ class WeaviateVectorDB(VectorDB):
for doc in documents[0]:
chunk_count += 1
params['chunk_order'] = chunk_count
# document_to_load = self._stuct(observation, params, metadata_schema_class)
logging.info("Loading document with defautl loader settings %s", str(doc))
# logging.info("Loading document with defautl loader settings %s", str(document_to_load))
params['source'] = "User loaded"
logging.info("Loading document with default loader settings %s", str(doc))
logging.info("Params are %s", str(params))
retriever.add_documents([
Document(metadata=params, page_content=doc.page_content)])
async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs):
async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid',params=None, **kwargs):
"""
Fetch documents from weaviate.
@ -276,6 +273,33 @@ class WeaviateVectorDB(VectorDB):
).with_additional(
["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance']
).with_where(filter_object).with_limit(30)
query_output = (
base_query
# .with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
.do()
)
elif search_type == 'summary_filter_by_object_name':
filter_object = {
"operator": "And",
"operands": [
{
"path": ["user_id"],
"operator": "Equal",
"valueText": self.user_id,
},
{
"path": ["doc_id"],
"operator": "Equal",
"valueText": params,
},
]
}
base_query = client.query.get(
namespace, list(list_objects_of_class(namespace, client.schema.get()))
).with_additional(
["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance']
).with_where(filter_object).with_limit(30).with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
query_output = (
base_query
.do()

View file

@ -0,0 +1,92 @@
import boto3
from botocore.exceptions import BotoCoreError, ClientError
from langdetect import detect, LangDetectException
import iso639
from dotenv import load_dotenv
load_dotenv()
import logging
# Basic configuration of the logging system
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def detect_language(text):
"""
Detect the language of the given text and return its ISO 639-1 language code.
If the detected language is Croatian ('hr'), it maps to Serbian ('sr').
The text is trimmed to the first 100 characters for efficient processing.
Parameters:
text (str): The text for language detection.
Returns:
str: The ISO 639-1 language code of the detected language, or 'None' in case of an error.
"""
# Trim the text to the first 100 characters
trimmed_text = text[:100]
try:
# Detect the language using langdetect
detected_lang_iso639_1 = detect(trimmed_text)
logging.info(f"Detected ISO 639-1 code: {detected_lang_iso639_1}")
# Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
if detected_lang_iso639_1 == 'hr':
return 'sr'
return detected_lang_iso639_1
except LangDetectException as e:
logging.error(f"Language detection error: {e}")
except Exception as e:
logging.error(f"Unexpected error: {e}")
return -1
def translate_text(text, source_language:str='sr', target_language:str='en', region_name='eu-west-1'):
"""
Translate text from source language to target language using AWS Translate.
Parameters:
text (str): The text to be translated.
source_language (str): The source language code (e.g., 'sr' for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
target_language (str): The target language code (e.g., 'en' for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
region_name (str): AWS region name.
Returns:
str: Translated text or an error message.
"""
if not text:
return "No text provided for translation."
if not source_language or not target_language:
return "Both source and target language codes are required."
try:
translate = boto3.client(service_name='translate', region_name=region_name, use_ssl=True)
result = translate.translate_text(Text=text, SourceLanguageCode=source_language, TargetLanguageCode=target_language)
return result.get('TranslatedText', 'No translation found.')
except BotoCoreError as e:
logging.info(f"BotoCoreError occurred: {e}")
return "Error with AWS Translate service configuration or request."
except ClientError as e:
logging.info(f"ClientError occurred: {e}")
return "Error with AWS client or network issue."
source_language = 'sr'
target_language = 'en'
text_to_translate = "Ja volim da pecam i idem na reku da šetam pored nje ponekad"
translated_text = translate_text(text_to_translate, source_language, target_language)
print(translated_text)
# print(detect_language("Koliko krava ide u setnju?"))

View file

@ -7,6 +7,11 @@ from graphviz import Digraph
from sqlalchemy import or_
from sqlalchemy.orm import contains_eager
from cognitive_architecture.database.postgres.database import AsyncSessionLocal
from dotenv import load_dotenv
load_dotenv()
# from graph_database.graph import KnowledgeGraph
@ -217,10 +222,11 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st
operations = result.unique().scalars().all()
# Extract memory names and document names and IDs
memory_names = [memory.memory_name for op in operations for memory in op.memories]
# memory_names = [memory.memory_name for op in operations for memory in op.memories]
memory_details = [(memory.memory_name, memory.memory_category) for op in operations for memory in op.memories]
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]
return memory_names, docs
return memory_details, docs
# except Exception as e:
# # Handle the exception as needed
@ -264,4 +270,42 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
#
# async def main():
# user_id = "user"
#
# async with session_scope(AsyncSessionLocal()) as session:
# output = await get_unsumarized_vector_db_namespace(session, user_id)
#
# print(output)
# # await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
# # out = await get_vectordb_namespace(session, user_id)
# # params = {
# # "version": "1.0",
# # "agreement_id": "AG123456",
# # "privacy_policy": "https://example.com/privacy",
# # "terms_of_service": "https://example.com/terms",
# # "format": "json",
# # "schema_version": "1.1",
# # "checksum": "a1b2c3d4e5f6",
# # "owner": "John Doe",
# # "license": "MIT",
# # "validity_start": "2023-08-01",
# # "validity_end": "2024-07-31",
# # }
# # loader_settings = {
# # "format": "PDF",
# # "source": "DEVICE",
# # "path": [".data"],
# # "strategy": "SUMMARY",
# # }
# # await load_documents_to_vectorstore(session, user_id, loader_settings=loader_settings)
# # await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday")
# # await add_documents_to_graph_db(session, user_id, loader_settings=loader_settings)
# # await user_context_enrichment(session, user_id, query="Tell me about the book I read yesterday")
#
#
# if __name__ == "__main__":
# import asyncio
#
# asyncio.run(main())

View file

@ -251,7 +251,7 @@ class Memory:
return f"Error creating user: {str(e)}"
@staticmethod
async def handle_new_memory(user_id: str, session, job_id: str = None, memory_name: str = None):
async def handle_new_memory(user_id: str, session, job_id: str = None, memory_name: str = None, memory_category:str='PUBLIC'):
"""
Handle new memory creation associated with a user.
@ -275,6 +275,7 @@ class Memory:
user_id=user_id,
operation_id=job_id,
memory_name=memory_name,
memory_category=memory_category,
methods_list=str(["Memory", "SemanticMemory", "EpisodicMemory"]),
attributes_list=str(
[

486
main.py
View file

@ -1,4 +1,6 @@
from typing import Optional, List
from neo4j.exceptions import Neo4jError
from pydantic import BaseModel, Field
from cognitive_architecture.database.graph_database.graph import Neo4jGraphDB
from cognitive_architecture.database.postgres.models.memory import MemoryModel
@ -37,7 +39,9 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from cognitive_architecture.utils import get_document_names, generate_letter_uuid, get_memory_name_by_doc_id, get_unsumarized_vector_db_namespace, get_vectordb_namespace, get_vectordb_document_name
async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str, namespace_id:str):
async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str, namespace_id:str, doc_id:str=None):
logging.info("user id is", user_id)
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, memory_label=namespace_id)
@ -63,12 +67,14 @@ async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str,
print("Available memory classes:", await memory.list_memory_classes())
result = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
observation="placeholder", search_type="summary")
observation="placeholder", search_type="summary_filter_by_object_name", params=doc_id)
logging.info("Result is", result)
return result, namespace_id
async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, content:str=None, job_id:str=None, loader_settings:dict=None):
async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, content:str=None, job_id:str=None, loader_settings:dict=None, memory_type:str="PRIVATE"):
namespace_id = str(generate_letter_uuid()) + "_" + "SEMANTICMEMORY"
namespace_class = namespace_id + "_class"
@ -78,7 +84,6 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
await add_entity(session, new_user)
except:
pass
if job_id is None:
job_id = str(uuid.uuid4())
@ -92,37 +97,50 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
),
)
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id=job_id, memory_label=namespace_id)
if content is not None:
document_names = [content[:30]]
if loader_settings is not None:
document_names = get_document_names(loader_settings.get("path", "None"))
document_source = loader_settings.get("document_names") if isinstance(loader_settings.get("document_names"),
list) else loader_settings.get("path", "None")
logging.info("Document source is %s", document_source)
# try:
document_names = get_document_names(document_source[0])
logging.info(str(document_names))
# except:
# document_names = document_source
for doc in document_names:
from cognitive_architecture.shared.language_processing import translate_text, detect_language
#translates doc titles to english
if loader_settings is not None:
logging.info("Detecting language of document %s", doc)
loader_settings["single_document_path"]= loader_settings.get("path", "None")[0] +"/"+doc
logging.info("Document path is %s", loader_settings.get("single_document_path", "None"))
memory_category = loader_settings.get("memory_category", "PUBLIC")
if loader_settings is None:
memory_category = "CUSTOM"
if detect_language(doc) != "en":
doc_ = doc.strip(".pdf").replace("-", " ")
doc_ = translate_text(doc_, "sr", "en")
else:
doc_=doc
doc_id = str(uuid.uuid4())
logging.info("Document name is %s", doc_)
await add_entity(
session,
DocsModel(
id=str(uuid.uuid4()),
id=doc_id,
operation_id=job_id,
graph_summary= False,
doc_name=doc
memory_category= memory_category,
doc_name=doc_
)
)
# Managing memory attributes
existing_user = await Memory.check_existing_user(user_id, session)
print("here is the existing user", existing_user)
await memory.manage_memory_attributes(existing_user)
params = {
"version": "1.0",
"agreement_id": "AG123456",
"privacy_policy": "https://example.com/privacy",
"terms_of_service": "https://example.com/terms",
"format": "json",
"schema_version": "1.1",
"checksum": "a1b2c3d4e5f6",
"owner": "John Doe",
"license": "MIT",
"validity_start": "2023-08-01",
"validity_end": "2024-07-31",
"doc_id":doc_id
}
print("Namespace id is %s", namespace_id)
await memory.add_dynamic_memory_class(namespace_id.lower(), namespace_id)
@ -141,10 +159,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
print("Available memory classes:", await memory.list_memory_classes())
result = await memory.dynamic_method_call(dynamic_memory_class, 'add_memories',
observation=content, params=params, loader_settings=loader_settings)
await update_entity(session, Operation, job_id, "SUCCESS")
# return result, namespace_id
async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str):
@ -174,37 +189,120 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu
return result
# async def add_documents_to_graph_db(session: AsyncSession, user_id: Optional[str] = None,
# document_memory_types: Optional[List[str]] = None):
# """ Add documents to a graph database, handling multiple memory types """
# if document_memory_types is None:
# document_memory_types = ['PUBLIC']
#
# memory_type_actions = {
# 'PUBLIC': {'topic': 'PublicMemory', 'additional_action': None},
# 'SEMANTIC': {'topic': 'SemanticMemory', 'additional_action': None}
# }
#
# try:
# memory_details, docs = await get_unsumarized_vector_db_namespace(session, user_id)
# filtered_memory_details = [detail for detail in memory_details if detail[1] in document_memory_types]
#
# neo4j_graph_db = None
# for doc in docs:
# doc_name, doc_id = doc
# try:
# classification_content = await fetch_document_vectordb_namespace(
# session, user_id, filtered_memory_details[0][0], doc_id)
# retrieval_chunks = [item['text'] for item in
# classification_content[0]['data']['Get'][filtered_memory_details[0][0]]]
# except Exception as e:
# logging.error(f"Error fetching document content: {e}")
# retrieval_chunks = ""
#
# concatenated_retrievals = ' '.join(retrieval_chunks)
# classification = await classify_documents(doc_name, document_id=doc_id, content=concatenated_retrievals)
#
# for memory_type in document_memory_types:
# if memory_type in memory_type_actions:
# if neo4j_graph_db is None:
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
# username=config.graph_database_username,
# password=config.graph_database_password)
# topic = memory_type_actions[memory_type]['topic']
# ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
# for id in ids:
# memory_id = id.get('memoryId')
# if memory_id:
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id,
# public_memory_id=memory_id if memory_type == 'PUBLIC' else None)
# neo4j_graph_db.query(rs)
#
# if filtered_memory_details[0][1] == memory_type:
# neo4j_graph_db.update_document_node_with_db_ids(
# vectordb_namespace=filtered_memory_details[0][0],
# document_id=doc_id, user_id=user_id if memory_type != "PUBLIC" else None)
# except Exception as e:
# logging.error(f"An error occurred: {e}")
# return e
async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, loader_settings:dict=None):
async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, document_memory_types:list=None):
""""""
if document_memory_types is None:
document_memory_types = ['PUBLIC']
logging.info("Document memory types are", document_memory_types)
try:
# await update_document_vectordb_namespace(postgres_session, user_id)
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
memory_details, docs = await get_unsumarized_vector_db_namespace(session, user_id)
logging.info("Docs are", docs)
for doc, memory_name in zip(docs, memory_names):
logging.info("Memory names are", memory_name)
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_name)
memory_details= [detail for detail in memory_details if detail[1] in document_memory_types]
logging.info("Memory details", memory_details)
for doc in docs:
logging.info("Memory names are", memory_details)
doc_name, doc_id = doc
logging.info("Doc id is", doc_id)
try:
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_details[0][0], doc_id)
retrieval_chunks = [item['text'] for item in
classification_content[0]['data']['Get'][memory_details[0][0]]]
logging.info("Classification content is", classification_content)
retrieval_chunks = [item['text'] for item in classification_content[0]['data']['Get'][memory_name]]
except:
classification_content = ""
retrieval_chunks = ""
# retrieval_chunks = [item['text'] for item in classification_content[0]['data']['Get'][memory_details[0]]]
# Concatenating the extracted text values
concatenated_retrievals = ' '.join(retrieval_chunks)
print(concatenated_retrievals)
doc_name, doc_id = doc
logging.info("Retrieval chunks are", retrieval_chunks)
classification = await classify_documents(doc_name, document_id =doc_id, content=concatenated_retrievals)
logging.info("Classification is", str(classification))
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
neo4j_graph_db.query(rs, classification)
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
if document_memory_types == ['PUBLIC']:
await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="PublicMemory")
print(ids)
else:
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="SemanticMemory")
print(ids)
for id in ids:
print(id.get('memoryId'))
if document_memory_types == ['PUBLIC']:
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id, public_memory_id=id.get('memoryId'))
else:
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id, memory_type='SemanticMemory')
logging.info("Cypher query is", rs)
neo4j_graph_db.query(rs)
logging.info("WE GOT HERE")
if memory_details[0][1] == "PUBLIC":
neo4j_graph_db.update_document_node_with_db_ids( vectordb_namespace=memory_details[0][0],
document_id=doc_id)
logging.info("hERE IS THE OUT2323", doc_id)
await update_entity_graph_summary(session, DocsModel, doc_id, True)
else:
neo4j_graph_db.update_document_node_with_db_ids( vectordb_namespace=memory_details[0][0],
document_id=doc_id, user_id=user_id)
# await update_entity_graph_summary(session, DocsModel, doc_id, True)
except Exception as e:
return e
@ -230,11 +328,13 @@ def generate_graph(input) -> ResponseString:
response_model=ResponseString,
)
return out
async def user_context_enrichment(session, user_id:str, query:str)->str:
async def user_context_enrichment(session, user_id:str, query:str, generative_response:bool=False, memory_type:str=None)->str:
"""
Asynchronously enriches the user context by integrating various memory systems and document classifications.
This function uses cognitive architecture to access and manipulate different memory systems (semantic, episodic, and procedural) associated with a user. It fetches memory details from a Neo4j graph database, classifies document categories based on the user's query, and retrieves document IDs for relevant categories. The function also dynamically manages memory attributes and methods, extending the context with document store information to enrich the user's query response.
This function uses cognitive architecture to access and manipulate different memory systems (semantic, episodic, and procedural) associated with a user.
It fetches memory details from a Neo4j graph database, classifies document categories based on the user's query, and retrieves document IDs for relevant categories.
The function also dynamically manages memory attributes and methods, extending the context with document store information to enrich the user's query response.
Parameters:
- session (AsyncSession): The database session for executing queries.
@ -265,31 +365,40 @@ async def user_context_enrichment(session, user_id:str, query:str)->str:
semantic_mem = neo4j_graph_db.retrieve_semantic_memory(user_id=user_id)
episodic_mem = neo4j_graph_db.retrieve_episodic_memory(user_id=user_id)
context = f""" You are a memory system that uses cognitive architecture to enrich the user context.
You have access to the following information:
EPISODIC MEMORY: {episodic_mem}
SEMANTIC MEMORY: {semantic_mem}
PROCEDURAL MEMORY: NULL
The original user query: {query}
"""
# public_mem = neo4j_graph_db.retrieve_public_memory(user_id=user_id)
logging.info("Context from graphdb is %s", context)
document_categories_query = await neo4j_graph_db.get_document_categories(user_id=user_id)
result = neo4j_graph_db.query(document_categories_query)
categories = [record["category"] for record in result]
logging.info('Possible document categories are', str(categories))
relevant_categories = await classify_call( query= query, context = context, document_types=str(categories))
logging.info("Relevant categories after the classifier are %s", relevant_categories)
from cognitive_architecture.shared.language_processing import translate_text, detect_language
get_doc_ids = await neo4j_graph_db.get_document_ids(user_id, relevant_categories)
postgres_id = neo4j_graph_db.query(get_doc_ids)
if detect_language(query) != "en":
query = translate_text(query, "sr", "en")
logging.info("Translated query is", query)
summaries = await neo4j_graph_db.get_memory_linked_document_summaries(user_id=user_id, memory_type=memory_type)
# logging.info("Result is %s", result)
# logging.info("Context from graphdb is %s", context)
# result = neo4j_graph_db.query(document_categories_query)
# summaries = [record.get("summary") for record in result]
# logging.info('Possible document categories are', str(result))
# logging.info('Possible document categories are', str(categories))
relevant_summary = await classify_call( query= query, document_summaries=str(summaries))
# logging.info("Relevant categories after the classifier are %s", relevant_categories)
postgres_id = await neo4j_graph_db.get_memory_linked_document_ids(user_id, summary = relevant_summary, memory_type=memory_type)
# postgres_id = neo4j_graph_db.query(get_doc_ids)
logging.info("Postgres ids are %s", postgres_id)
namespace_id = await get_memory_name_by_doc_id(session, postgres_id[0]["d_id"])
namespace_id = await get_memory_name_by_doc_id(session, postgres_id[0])
logging.info("Namespace ids are %s", namespace_id)
params= {"doc_id":postgres_id[0]}
namespace_id = namespace_id[0]
namespace_class = namespace_id + "_class"
if memory_type =='PublicMemory':
user_id = 'system_user'
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id="23232",
memory_label=namespace_id)
@ -313,15 +422,151 @@ async def user_context_enrichment(session, user_id:str, query:str)->str:
print(f"No attribute named in memory.")
print("Available memory classes:", await memory.list_memory_classes())
result = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
observation=query)
context_extension = "Document store information that can help and enrich the anwer is: " + str(result)
entire_context = context + context_extension
final_result = generate_graph(entire_context)
logging.info("Final result is %s", final_result)
results = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
observation=query, params=postgres_id[0], search_type="summary_filter_by_object_name")
logging.info("Result is", str(results))
return final_result.response
search_context = ""
for result in results['data']['Get'][namespace_id]:
# Assuming 'result' is a dictionary and has keys like 'source', 'text'
source = result['source']
text = result['text']
search_context += f"Document source: {source}, Document text: {text} \n"
context = f""" You are a memory system that uses cognitive architecture to enrich the
LLM context and provide better query response.
You have access to the following information:
EPISODIC MEMORY: {episodic_mem}
SEMANTIC MEMORY: {semantic_mem}
PROCEDURAL MEMORY: NULL
SEARCH CONTEXT: The following documents provided with sources they were
extracted from could be used to provide an answer {search_context}
The original user query: {query}
"""
if generative_response is not True:
return context
else:
generative_result = generate_graph(context)
translation_to_srb = translate_text(generative_result.response, "en", "sr")
return translation_to_srb
async def create_public_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
"""
Create a public memory node associated with a user in a Neo4j graph database.
If Public Memory exists, it will return the id of the memory.
This is intended as standalone node that can be attached to any user.
It is not attached to any user by default.
Args:
user_id (str): The unique identifier for the user.
session (AsyncSession): An asynchronous session for database operations.
Returns:
Optional[int]: The ID of the created public memory node or None if an error occurs.
:param labels: Label for the memory, to help filter for different countries
:param topic: Topic for the memory, to help provide a name
"""
# Validate input parameters
if not labels:
labels = ['sr'] # Labels for the memory node
if not topic:
topic = "PublicMemory"
try:
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
username=config.graph_database_username,
password=config.graph_database_password)
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
# Create the memory node
memory_id = await neo4j_graph_db.create_memory_node(labels=labels, topic=topic)
return memory_id
except Neo4jError as e:
logging.error(f"Error creating public memory node: {e}")
return None
async def attach_user_to_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
"""
Link user to public memory
Args:
user_id (str): The unique identifier for the user.
topic (str): Memory name
Returns:
Optional[int]: The ID of the created public memory node or None if an error occurs.
:param labels: Label for the memory, to help filter for different countries
:param topic: Topic for the memory, to help provide a name
"""
# Validate input parameters
if not user_id:
raise ValueError("User ID is required.")
if not labels:
labels = ['sr'] # Labels for the memory node
if not topic:
topic = "PublicMemory"
try:
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
username=config.graph_database_username,
password=config.graph_database_password)
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
for id in ids:
linked_memory = neo4j_graph_db.link_public_memory_to_user(memory_id=id.get('memoryId'), user_id=user_id)
return 1
except Neo4jError as e:
logging.error(f"Error creating public memory node: {e}")
return None
async def unlink_user_from_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
"""
Unlink user from memory
Args:
user_id (str): The unique identifier for the user.
topic (str): Memory name
Returns:
Optional[int]: The ID of the created public memory node or None if an error occurs.
:param labels: Label for the memory, to help filter for different countries
:param topic: Topic for the memory, to help provide a name
"""
# Validate input parameters
if not user_id:
raise ValueError("User ID is required.")
if not labels:
labels = ['sr'] # Labels for the memory node
raise ValueError("Labels are required, default label 'sr' provided")
if not topic:
topic = "PublicMemory"
raise ValueError("Topic is required, default topic 'PublicMemory' provided")
try:
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
username=config.graph_database_username,
password=config.graph_database_password)
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
for id in ids:
linked_memory = neo4j_graph_db.unlink_memory_from_user(memory_id=id.get('memoryId'), user_id=user_id)
return 1
except Neo4jError as e:
logging.error(f"Error creating public memory node: {e}")
return None
@ -329,7 +574,120 @@ async def main():
user_id = "user"
async with session_scope(AsyncSessionLocal()) as session:
await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
# await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
# output = await get_unsumarized_vector_db_namespace(session, user_id)
class GraphQLQuery(BaseModel):
query: str
# def cypher_statement_correcting( input: str) -> str:
# out = aclient.chat.completions.create(
# model=config.model,
# temperature=0,
# max_tokens=2000,
# messages=[
# {
# "role": "user",
# "content": f"""Check the cypher query for syntax issues, and fix any if found and return it as is: {input}. """,
#
# },
# {"role": "system", "content": """You are a top-tier algorithm
# designed for checking cypher queries for neo4j graph databases. You have to return input provided to you as is."""}
# ],
# response_model=GraphQLQuery,
# )
# return out
#
#
# query= """WITH person1_4f21b68c73e24d0497e1010eb747b892, location2_dc0c68a9651142d38b6e117bfdc5c227, object3_4c7ba47babd24be1b35c30c42c87a3e9, product4_c984d5f9695f48ee9a43f58f57cc6740, location5_5e43f4c45b3c44ea897c12220db4c051, object6_5cdb87ad488c450c9dbce07b7daf3d8d, information7_f756e3f3720c4fe5aeb01287badaf088, event8_da6334e744454264900296319e14b532, action9_48e45419604e4d66b3e718ee1d6c095f, action10_f48acb1db4da4934afbe17363e9e63a4, user , semantic, episodic, buffer
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:EXPERIENCED]->(event8_da6334e744454264900296319e14b532)
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:HAS]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
# CREATE (object3_4c7ba47babd24be1b35c30c42c87a3e9)-[:INCLUDES]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
# CREATE (product4_c984d5f9695f48ee9a43f58f57cc6740)-[:TO_BE_PURCHASED_AT]->(location5_5e43f4c45b3c44ea897c12220db4c051)
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:INTENDS_TO_PERFORM]->(action9_48e45419604e4d66b3e718ee1d6c095f)
# CREATE (object6_5cdb87ad488c450c9dbce07b7daf3d8d)-[:A_CLASSICAL_BOOK_TO_BE_SUMMARIZED]->(information7_f756e3f3720c4fe5aeb01287badaf088)
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:NEEDS_TO_COMPLETE]->(action10_f48acb1db4da4934afbe17363e9e63a4)
# WITH person1_4f21b68c73e24d0497e1010eb747b892, location2_dc0c68a9651142d38b6e117bfdc5c227, object3_4c7ba47babd24be1b35c30c42c87a3e9, product4_c984d5f9695f48ee9a43f58f57cc6740, location5_5e43f4c45b3c44ea897c12220db4c051, object6_5cdb87ad488c450c9dbce07b7daf3d8d, information7_f756e3f3720c4fe5aeb01287badaf088, event8_da6334e744454264900296319e14b532, action9_48e45419604e4d66b3e718ee1d6c095f, action10_f48acb1db4da4934afbe17363e9e63a4, user, semantic, episodic, buffer
# CREATE (episodic)-[:HAS_EVENT]->(person1_4f21b68c73e24d0497e1010eb747b892)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(person1_4f21b68c73e24d0497e1010eb747b892)
# CREATE (episodic)-[:HAS_EVENT]->(location2_dc0c68a9651142d38b6e117bfdc5c227)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(location2_dc0c68a9651142d38b6e117bfdc5c227)
# CREATE (episodic)-[:HAS_EVENT]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
# CREATE (episodic)-[:HAS_EVENT]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
# CREATE (episodic)-[:HAS_EVENT]->(location5_5e43f4c45b3c44ea897c12220db4c051)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(location5_5e43f4c45b3c44ea897c12220db4c051)
# CREATE (episodic)-[:HAS_EVENT]->(object6_5cdb87ad488c450c9dbce07b7daf3d8d)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(object6_5cdb87ad488c450c9dbce07b7daf3d8d)
# CREATE (episodic)-[:HAS_EVENT]->(information7_f756e3f3720c4fe5aeb01287badaf088)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(information7_f756e3f3720c4fe5aeb01287badaf088)
# CREATE (episodic)-[:HAS_EVENT]->(event8_da6334e744454264900296319e14b532)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(event8_da6334e744454264900296319e14b532)
# CREATE (episodic)-[:HAS_EVENT]->(action9_48e45419604e4d66b3e718ee1d6c095f)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action9_48e45419604e4d66b3e718ee1d6c095f)
# CREATE (episodic)-[:HAS_EVENT]->(action10_f48acb1db4da4934afbe17363e9e63a4)
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action10_f48acb1db4da4934afbe17363e9e63a4)"""
#
# out = cypher_statement_correcting(query)
# print(out)
#
# out = await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday")
# print(out)
# load_doc_to_graph = await add_documents_to_graph_db(session, user_id)
# print(load_doc_to_graph)
# user_id = 'user'
# loader_settings = {
# "format": "PDF",
# "source": "DEVICE",
# "path": [".data"]
# }
# await load_documents_to_vectorstore(session, user_id, loader_settings=loader_settings)
# await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
# await add_documents_to_graph_db(session, user_id)
#
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
password=config.graph_database_password)
# await attach_user_to_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
# return_ = await user_context_enrichment(user_id=user_id, query="Koja je minimalna visina ograde na balkonu na stambenom objektu", session=session)
# print(return_)
# document_summary = {
# 'DocumentCategory': 'Science',
# 'Title': 'The Future of AI',
# 'Summary': 'An insightful article about the advancements in AI.',
# 'd_id': 'doc123'
# }
#
# # Example user ID
# user_id = 'user'
#
# # value = await neo4j_graph_db.create_memory_node(labels=['sr'])
# # print(value)
# # neo4j_graph_db.close()
#
# await add_documents_to_graph_db(session, user_id)
# neo4j_graph_db.link_public_memory_to_user(memory_id = 17,user_id=user_id)
#
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="Document")
print(ids)
for id in ids:
print(id.get('memoryId'))
neo4j_graph_db.delete_memory_node(memory_id = id.get('memoryId'), topic="Document")
#
# neo4j_graph_db.delete_memory_node(memory_id=16, topic="PublicSerbianArchitecture")
# neo4j_graph_db.unlink_memory_from_user(memory_id = 17,user_id=user_id)
# cypher_query_public = neo4j_graph_db.create_document_node_cypher(document_summary, user_id, memory_type="PUBLIC")
# neo4j_graph_db.query(cypher_query_public)
# link_memory_to_user(user_id, session)
# neo4j_graph_db.create_memory_node(labels=['sr'])
# out = await get_vectordb_namespace(session, user_id)
# params = {
# "version": "1.0",

12
poetry.lock generated
View file

@ -2121,6 +2121,16 @@ typing_extensions = "*"
[package.extras]
aws = ["boto3"]
[[package]]
name = "iso639"
version = "0.1.4"
description = "ISO639-2 support for Python."
optional = false
python-versions = "*"
files = [
{file = "iso639-0.1.4.tar.gz", hash = "sha256:88b70cf6c64ee9c2c2972292818c8beb32db9ea6f4de1f8471a9b081a3d92e98"},
]
[[package]]
name = "itsdangerous"
version = "2.1.2"
@ -6782,4 +6792,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "b32ccb9319a4411b11b4aa2fde21b354ea93488e4bd6057c62847baf47cf7db6"
content-hash = "c3402714a133f8583fcbf06955f155d42169ffd54874d565617b2c4cdf58a227"

View file

@ -59,6 +59,8 @@ graphviz = "^0.20.1"
greenlet = "^3.0.1"
neo4j = "^5.14.1"
grpcio = "^1.60.0"
langdetect = "^1.0.9"
iso639 = "^0.1.4"