Add all functions for architecture to work + for regular user flows, refactor and reformat issues, add boilerplate for loading jobs logic and extend search functionalities.
This commit is contained in:
parent
118613c484
commit
2928f51343
37 changed files with 1747 additions and 215 deletions
Binary file not shown.
BIN
.data/Pravilnik-o-energetskoj-efikasnosti-zgrada.pdf
Normal file
BIN
.data/Pravilnik-o-energetskoj-efikasnosti-zgrada.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
.data/Pravilnik-o-postupku-sprovodjenja-objedinjene-1.pdf
Normal file
BIN
.data/Pravilnik-o-postupku-sprovodjenja-objedinjene-1.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -12,3 +12,7 @@ COG_ARCH_DIR = cognitive_architecture
|
|||
GRAPH_DB_URL =
|
||||
GRAPH_DB_PW =
|
||||
GRAPH_DB_USER =
|
||||
AWS_ACCESS_KEY_ID =
|
||||
AWS_SECRET_ACCESS_KEY =
|
||||
QDRANT_API_KEY
|
||||
QDRANT_API_URL
|
||||
|
|
@ -24,6 +24,7 @@ RUN apt-get update -q && \
|
|||
curl \
|
||||
zip \
|
||||
jq \
|
||||
libgl1-mesa-glx \
|
||||
netcat-traditional && \
|
||||
pip install poetry && \
|
||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||
|
|
|
|||
153
api.py
153
api.py
|
|
@ -95,6 +95,36 @@ async def add_memory(
|
|||
content={"response": {"error": str(e)}}, status_code=503
|
||||
)
|
||||
|
||||
@app.post("/add-architecture-public-memory", response_model=dict)
|
||||
async def add_memory(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
try:
|
||||
logging.info(" Adding to Memory ")
|
||||
decoded_payload = payload.payload
|
||||
async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
from main import load_documents_to_vectorstore
|
||||
if 'content' in decoded_payload and decoded_payload['content'] is not None:
|
||||
content = decoded_payload['content']
|
||||
else:
|
||||
content = None
|
||||
|
||||
user_id = 'system_user'
|
||||
loader_settings = {
|
||||
"format": "PDF",
|
||||
"source": "DEVICE",
|
||||
"path": [".data"]
|
||||
}
|
||||
|
||||
output = await load_documents_to_vectorstore(session, user_id=user_id, content=content, loader_settings=loader_settings)
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
return JSONResponse(
|
||||
content={"response": {"error": str(e)}}, status_code=503
|
||||
)
|
||||
|
||||
@app.post("/user-query-to-graph")
|
||||
async def user_query_to_graph(payload: Payload):
|
||||
try:
|
||||
|
|
@ -120,22 +150,26 @@ async def document_to_graph_db(payload: Payload):
|
|||
settings_for_loader = decoded_payload['settings']
|
||||
else:
|
||||
settings_for_loader = None
|
||||
if 'memory_type' in decoded_payload and decoded_payload['memory_type'] is not None:
|
||||
memory_type = decoded_payload['memory_type']
|
||||
else:
|
||||
memory_type = None
|
||||
async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], loader_settings =settings_for_loader)
|
||||
result = await add_documents_to_graph_db(session =session, user_id = decoded_payload['user_id'], document_memory_types =memory_type)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/user-query-processor")
|
||||
async def user_query_processor(payload: Payload):
|
||||
@app.post("/cognitive-context-enrichment")
|
||||
async def cognitive_context_enrichment(payload: Payload):
|
||||
try:
|
||||
decoded_payload = payload.payload
|
||||
|
||||
# Execute the query - replace this with the actual execution method
|
||||
async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
# Assuming you have a method in Neo4jGraphDB to execute the query
|
||||
result = await user_context_enrichment(session, decoded_payload['user_id'], decoded_payload['query'])
|
||||
result = await user_context_enrichment(session, user_id = decoded_payload['user_id'], query= decoded_payload['query'], generative_response=decoded_payload['generative_response'], memory_type= decoded_payload['memory_type'])
|
||||
return JSONResponse(content={"response": result}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -163,12 +197,121 @@ async def user_query_classfier(payload: Payload):
|
|||
async def drop_db(payload: Payload):
|
||||
try:
|
||||
decoded_payload = payload.payload
|
||||
return JSONResponse(content={"response": "dropped"}, status_code=200)
|
||||
|
||||
if decoded_payload['operation'] == 'drop':
|
||||
|
||||
if os.environ.get('AWS_ENV') == 'dev':
|
||||
host = os.environ.get('POSTGRES_HOST')
|
||||
username = os.environ.get('POSTGRES_USER')
|
||||
password = os.environ.get('POSTGRES_PASSWORD')
|
||||
database_name = os.environ.get('POSTGRES_DB')
|
||||
else:
|
||||
pass
|
||||
|
||||
from cognitive_architecture.database.postgres import drop_database, create_admin_engine
|
||||
|
||||
engine = create_admin_engine(username, password, host, database_name)
|
||||
drop_database(engine)
|
||||
return JSONResponse(content={"response": "DB dropped"}, status_code=200)
|
||||
else:
|
||||
|
||||
if os.environ.get('AWS_ENV') == 'dev':
|
||||
host = os.environ.get('POSTGRES_HOST')
|
||||
username = os.environ.get('POSTGRES_USER')
|
||||
password = os.environ.get('POSTGRES_PASSWORD')
|
||||
database_name = os.environ.get('POSTGRES_DB')
|
||||
else:
|
||||
pass
|
||||
|
||||
from cognitive_architecture.database.postgres import create_database, create_admin_engine
|
||||
|
||||
engine = create_admin_engine(username, password, host, database_name)
|
||||
create_database(engine)
|
||||
return JSONResponse(content={"response": " DB created"}, status_code=200)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
return HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/create-public-memory")
|
||||
async def create_public_memory(payload: Payload):
|
||||
try:
|
||||
decoded_payload = payload.payload
|
||||
|
||||
if 'user_id' in decoded_payload and decoded_payload['user_id'] is not None:
|
||||
user_id = decoded_payload['user_id']
|
||||
else:
|
||||
user_id = None
|
||||
|
||||
if 'labels' in decoded_payload and decoded_payload['labels'] is not None:
|
||||
labels = decoded_payload['labels']
|
||||
else:
|
||||
labels = None
|
||||
|
||||
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
|
||||
topic = decoded_payload['topic']
|
||||
else:
|
||||
topic = None
|
||||
|
||||
# Execute the query - replace this with the actual execution method
|
||||
# async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
# from main import create_public_memory
|
||||
# Assuming you have a method in Neo4jGraphDB to execute the query
|
||||
result = await create_public_memory(user_id=user_id, labels=labels, topic=topic)
|
||||
return JSONResponse(content={"response": result}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/attach-user-to-public-memory")
|
||||
async def attach_user_to_public_memory(payload: Payload):
|
||||
try:
|
||||
decoded_payload = payload.payload
|
||||
|
||||
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
|
||||
topic = decoded_payload['topic']
|
||||
else:
|
||||
topic = None
|
||||
if 'labels' in decoded_payload and decoded_payload['labels'] is not None:
|
||||
labels = decoded_payload['labels']
|
||||
else:
|
||||
labels = ['sr']
|
||||
|
||||
# Execute the query - replace this with the actual execution method
|
||||
async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
from main import attach_user_to_memory, create_public_memory
|
||||
# Assuming you have a method in Neo4jGraphDB to execute the query
|
||||
await create_public_memory(user_id=decoded_payload['user_id'], topic=topic, labels=labels)
|
||||
result = await attach_user_to_memory( user_id = decoded_payload['user_id'], topic=topic, labels=labels)
|
||||
return JSONResponse(content={"response": result}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/unlink-user-from-public-memory")
|
||||
async def unlink_user_from_public_memory(payload: Payload):
|
||||
try:
|
||||
decoded_payload = payload.payload
|
||||
|
||||
if 'topic' in decoded_payload and decoded_payload['topic'] is not None:
|
||||
topic = decoded_payload['topic']
|
||||
else:
|
||||
topic = None
|
||||
|
||||
# Execute the query - replace this with the actual execution method
|
||||
async with session_scope(session=AsyncSessionLocal()) as session:
|
||||
from main import unlink_user_from_memory
|
||||
# Assuming you have a method in Neo4jGraphDB to execute the query
|
||||
result = await unlink_user_from_memory( user_id = decoded_payload['user_id'], topic=topic)
|
||||
return JSONResponse(content={"response": result}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||
"""
|
||||
Start the API server using uvicorn.
|
||||
|
|
|
|||
|
|
@ -73,11 +73,11 @@ def classify_retrieval():
|
|||
|
||||
|
||||
# classify documents according to type of document
|
||||
async def classify_call(query, context, document_types):
|
||||
async def classify_call(query, document_summaries):
|
||||
|
||||
llm = ChatOpenAI(temperature=0, model=config.model)
|
||||
prompt_classify = ChatPromptTemplate.from_template(
|
||||
"""You are a classifier. Determine what document types are relevant : {query}, Context: {context}, Book_types:{document_types}"""
|
||||
"""You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries:{document_summaries}"""
|
||||
)
|
||||
json_structure = [{
|
||||
"name": "classifier",
|
||||
|
|
@ -85,20 +85,20 @@ async def classify_call(query, context, document_types):
|
|||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"DocumentCategory": {
|
||||
"DocumentSummary": {
|
||||
"type": "string",
|
||||
"description": "The classification of documents in groups such as legal, medical, etc."
|
||||
"description": "The summary of the document and the topic it deals with."
|
||||
}
|
||||
|
||||
|
||||
}, "required": ["DocumentCategory"] }
|
||||
}, "required": ["DocumentSummary"] }
|
||||
}]
|
||||
chain_filter = prompt_classify | llm.bind(function_call={"name": "classifier"}, functions=json_structure)
|
||||
classifier_output = await chain_filter.ainvoke({"query": query, "context": context, "document_types": document_types})
|
||||
classifier_output = await chain_filter.ainvoke({"query": query, "document_summaries": document_summaries})
|
||||
arguments_str = classifier_output.additional_kwargs['function_call']['arguments']
|
||||
print("This is the arguments string", arguments_str)
|
||||
arguments_dict = json.loads(arguments_str)
|
||||
classfier_value = arguments_dict.get('DocumentCategory', None)
|
||||
classfier_value = arguments_dict.get('DocumentSummary', None)
|
||||
|
||||
print("This is the classifier value", classfier_value)
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,19 @@ def create_database(username, password, host, db_name):
|
|||
engine.dispose()
|
||||
|
||||
|
||||
def drop_database(username, password, host, db_name):
|
||||
engine = create_admin_engine(username, password, host)
|
||||
connection = engine.raw_connection()
|
||||
connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
|
||||
cursor.close()
|
||||
connection.close()
|
||||
engine.dispose()
|
||||
print(f"Database {db_name} dropped successfully.")
|
||||
|
||||
|
||||
|
||||
def create_tables(engine):
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from neo4j import AsyncSession
|
||||
from neo4j.exceptions import Neo4jError
|
||||
|
||||
print(os.getcwd())
|
||||
|
||||
import networkx as nx
|
||||
|
|
@ -25,8 +28,10 @@ from abc import ABC, abstractmethod
|
|||
# Allows the return of Pydantic model rather than raw JSON
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from ...utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, create_node_variable_mapping
|
||||
from typing import List, Dict, Optional
|
||||
from ...utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, \
|
||||
create_node_variable_mapping, get_unsumarized_vector_db_namespace
|
||||
|
||||
DEFAULT_PRESET = "promethai_chat"
|
||||
preset_options = [DEFAULT_PRESET]
|
||||
PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".")
|
||||
|
|
@ -113,7 +118,8 @@ class KnowledgeGraph(BaseModel):
|
|||
nodes: List[Node] = Field(..., default_factory=list)
|
||||
edges: List[Edge] = Field(..., default_factory=list)
|
||||
|
||||
|
||||
class GraphQLQuery(BaseModel):
|
||||
query: str
|
||||
#
|
||||
|
||||
def generate_graph(input) -> KnowledgeGraph:
|
||||
|
|
@ -185,13 +191,25 @@ class AbstractGraphDB(ABC):
|
|||
|
||||
class Neo4jGraphDB(AbstractGraphDB):
|
||||
def __init__(self, url, username, password):
|
||||
self.graph = Neo4jGraph(url=url, username=username, password=password)
|
||||
# self.graph = Neo4jGraph(url=url, username=username, password=password)
|
||||
from neo4j import GraphDatabase
|
||||
self.driver = GraphDatabase.driver(url, auth=(username, password))
|
||||
self.openai_key = config.openai_key
|
||||
|
||||
|
||||
|
||||
def close(self):
|
||||
# Method to close the Neo4j driver instance
|
||||
self.driver.close()
|
||||
|
||||
def query(self, query, params=None):
|
||||
return self.graph.query(query, params)
|
||||
try:
|
||||
with self.driver.session() as session:
|
||||
result = session.run(query, params).data()
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while executing the query: {e}")
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
|
|
@ -209,7 +227,7 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
return user_memory_cypher
|
||||
|
||||
def user_query_to_edges_and_nodes(self, input: str) ->KnowledgeGraph:
|
||||
return openai.ChatCompletion.create(
|
||||
return aclient.chat.completions.create(
|
||||
model=config.model,
|
||||
messages=[
|
||||
{
|
||||
|
|
@ -255,6 +273,22 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
response_model=KnowledgeGraph,
|
||||
)
|
||||
|
||||
|
||||
def cypher_statement_correcting(self, input: str) ->str:
|
||||
return aclient.chat.completions.create(
|
||||
model=config.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""Check the cypher query for syntax issues, and fix any if found and return it as is: {input}. """,
|
||||
|
||||
},
|
||||
{"role": "system", "content": """You are a top-tier algorithm
|
||||
designed for checking cypher queries for neo4j graph databases. You have to return input provided to you as is"""}
|
||||
],
|
||||
response_model=GraphQLQuery,
|
||||
)
|
||||
|
||||
def generate_create_statements_for_nodes_with_uuid(self, nodes, unique_mapping, base_node_mapping):
|
||||
create_statements = []
|
||||
for node in nodes:
|
||||
|
|
@ -324,6 +358,10 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
# # Combine all statements
|
||||
cypher_statements = [self.create_base_cognitive_architecture(user_id)] + create_nodes_statements + create_edges_statements + memory_type_statements_with_uuid_and_time_context
|
||||
cypher_statements_joined = "\n".join(cypher_statements)
|
||||
logging.info("User Cypher Query raw: %s", cypher_statements_joined)
|
||||
# corrected_cypher_statements = self.cypher_statement_correcting(input = cypher_statements_joined)
|
||||
# logging.info("User Cypher Query: %s", corrected_cypher_statements.query)
|
||||
# return corrected_cypher_statements.query
|
||||
return cypher_statements_joined
|
||||
|
||||
|
||||
|
|
@ -334,7 +372,7 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
def delete_all_user_memories(self, user_id):
|
||||
try:
|
||||
# Check if the user exists
|
||||
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
user_exists = self.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
if not user_exists:
|
||||
return f"No user found with ID: {user_id}"
|
||||
|
||||
|
|
@ -348,7 +386,7 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
MATCH (user)-[:HAS_BUFFER]->(buffer)
|
||||
DETACH DELETE semantic, episodic, buffer
|
||||
"""
|
||||
self.graph.query(delete_query)
|
||||
self.query(delete_query)
|
||||
return f"All memories deleted for user ID: {user_id}"
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
|
|
@ -356,7 +394,7 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
def delete_specific_memory_type(self, user_id, memory_type):
|
||||
try:
|
||||
# Check if the user exists
|
||||
user_exists = self.graph.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
user_exists = self.query(f"MATCH (user:User {{userId: '{user_id}'}}) RETURN user")
|
||||
if not user_exists:
|
||||
return f"No user found with ID: {user_id}"
|
||||
|
||||
|
|
@ -369,7 +407,7 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
MATCH (user:User {{userId: '{user_id}'}})-[:HAS_{memory_type.upper()}]->(memory)
|
||||
DETACH DELETE memory
|
||||
"""
|
||||
self.graph.query(delete_query)
|
||||
self.query(delete_query)
|
||||
return f"{memory_type} deleted for user ID: {user_id}"
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
|
|
@ -396,6 +434,15 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
RETURN item
|
||||
"""
|
||||
return self.query(query, params={"user_id": user_id})
|
||||
|
||||
|
||||
def retrieve_public_memory(self, user_id: str):
|
||||
query = """
|
||||
MATCH (user:User {userId: $user_id})-[:HAS_PUBLIC_MEMORY]->(public:PublicMemory)
|
||||
MATCH (public)-[:HAS_DOCUMENT]->(document)
|
||||
RETURN document
|
||||
"""
|
||||
return self.query(query, params={"user_id": user_id})
|
||||
def generate_graph_semantic_memory_document_summary(self, document_summary : str, unique_graphdb_mapping_values: dict, document_namespace: str):
|
||||
""" This function takes a document and generates a document summary in Semantic Memory"""
|
||||
create_statements = []
|
||||
|
|
@ -429,71 +476,142 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
|
||||
return create_statements
|
||||
|
||||
async def get_document_categories(self, user_id: str):
|
||||
|
||||
async def get_memory_linked_document_summaries(self, user_id: str, memory_type: str = "PublicMemory"):
|
||||
"""
|
||||
Retrieve a list of categories for all documents associated with a given user.
|
||||
Retrieve a list of summaries for all documents associated with a given memory type for a user.
|
||||
|
||||
This function executes a Cypher query in a Neo4j database to fetch the categories
|
||||
of all 'Document' nodes that are linked to the 'SemanticMemory' node of the specified user.
|
||||
|
||||
Parameters:
|
||||
- session (AsyncSession): The database session for executing the query.
|
||||
- user_id (str): The unique identifier of the user.
|
||||
Args:
|
||||
user_id (str): The unique identifier of the user.
|
||||
memory_type (str): The type of memory node ('SemanticMemory' or 'PublicMemory').
|
||||
|
||||
Returns:
|
||||
- List[str]: A list of document categories associated with the user.
|
||||
List[str]: A list of document categories associated with the memory type for the user.
|
||||
|
||||
Raises:
|
||||
- Exception: If an error occurs during the database query execution.
|
||||
Exception: If an error occurs during the database query execution.
|
||||
"""
|
||||
if memory_type == "PublicMemory":
|
||||
relationship = "HAS_PUBLIC_MEMORY"
|
||||
elif memory_type == "SemanticMemory":
|
||||
relationship = "HAS_SEMANTIC_MEMORY"
|
||||
try:
|
||||
query = f'''
|
||||
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
|
||||
RETURN document.documentCategory AS category
|
||||
MATCH (user:User {{userId: '{user_id}'}})-[:{relationship}]->(memory:{memory_type})-[:HAS_DOCUMENT]->(document:Document)
|
||||
RETURN document.summary AS summary
|
||||
'''
|
||||
logging.info(f"Generated Cypher query: {query}")
|
||||
return query
|
||||
result = self.query(query)
|
||||
logging.info("Result: ", result)
|
||||
return [record.get("summary", "No summary available") for record in result]
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while retrieving document categories: {str(e)}")
|
||||
logging.error(f"An error occurred while retrieving document summary: {str(e)}")
|
||||
return None
|
||||
|
||||
async def get_document_ids(self, user_id: str, category: str):
|
||||
|
||||
# async def get_document_categories(self, user_id: str):
|
||||
# """
|
||||
# Retrieve a list of categories for all documents associated with a given user.
|
||||
#
|
||||
# This function executes a Cypher query in a Neo4j database to fetch the categories
|
||||
# of all 'Document' nodes that are linked to the 'SemanticMemory' node of the specified user.
|
||||
#
|
||||
# Parameters:
|
||||
# - session (AsyncSession): The database session for executing the query.
|
||||
# - user_id (str): The unique identifier of the user.
|
||||
#
|
||||
# Returns:
|
||||
# - List[str]: A list of document categories associated with the user.
|
||||
#
|
||||
# Raises:
|
||||
# - Exception: If an error occurs during the database query execution.
|
||||
# """
|
||||
# try:
|
||||
# query = f'''
|
||||
# MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document)
|
||||
# RETURN document.documentCategory AS category
|
||||
# '''
|
||||
# logging.info(f"Generated Cypher query: {query}")
|
||||
# return query
|
||||
#
|
||||
# except Exception as e:
|
||||
# logging.error(f"An error occurred while retrieving document categories: {str(e)}")
|
||||
# return None
|
||||
|
||||
# async def get_document_ids(self, user_id: str, category: str):
|
||||
# """
|
||||
# Retrieve a list of document IDs for a specific category associated with a given user.
|
||||
#
|
||||
# This function executes a Cypher query in a Neo4j database to fetch the IDs
|
||||
# of all 'Document' nodes in a specific category that are linked to the 'SemanticMemory' node of the specified user.
|
||||
#
|
||||
# Parameters:
|
||||
# - user_id (str): The unique identifier of the user.
|
||||
# - category (str): The specific document category to filter by.
|
||||
#
|
||||
# Returns:
|
||||
# - List[str]: A list of document IDs in the specified category associated with the user.
|
||||
#
|
||||
# Raises:
|
||||
# - Exception: If an error occurs during the database query execution.
|
||||
# """
|
||||
# try:
|
||||
# query = f'''
|
||||
# MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{documentCategory: '{category}'}})
|
||||
# RETURN document.d_id AS d_id
|
||||
# '''
|
||||
# logging.info(f"Generated Cypher query: {query}")
|
||||
# return query
|
||||
#
|
||||
# except Exception as e:
|
||||
# logging.error(f"An error occurred while retrieving document IDs: {str(e)}")
|
||||
# return None
|
||||
|
||||
async def get_memory_linked_document_ids(self, user_id: str, summary: str, memory_type: str = "PUBLIC"):
|
||||
"""
|
||||
Retrieve a list of document IDs for a specific category associated with a given user.
|
||||
Retrieve a list of document IDs for a specific category associated with a given memory type for a user.
|
||||
|
||||
This function executes a Cypher query in a Neo4j database to fetch the IDs
|
||||
of all 'Document' nodes in a specific category that are linked to the 'SemanticMemory' node of the specified user.
|
||||
|
||||
Parameters:
|
||||
- user_id (str): The unique identifier of the user.
|
||||
- category (str): The specific document category to filter by.
|
||||
Args:
|
||||
user_id (str): The unique identifier of the user.
|
||||
summary (str): The specific document summary to filter by.
|
||||
memory_type (str): The type of memory node ('SemanticMemory' or 'PublicMemory').
|
||||
|
||||
Returns:
|
||||
- List[str]: A list of document IDs in the specified category associated with the user.
|
||||
List[str]: A list of document IDs in the specified category associated with the memory type for the user.
|
||||
|
||||
Raises:
|
||||
- Exception: If an error occurs during the database query execution.
|
||||
Exception: If an error occurs during the database query execution.
|
||||
"""
|
||||
|
||||
if memory_type == "PublicMemory":
|
||||
relationship = "HAS_PUBLIC_MEMORY"
|
||||
elif memory_type == "SemanticMemory":
|
||||
relationship = "HAS_SEMANTIC_MEMORY"
|
||||
try:
|
||||
query = f'''
|
||||
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{documentCategory: '{category}'}})
|
||||
MATCH (user:User {{userId: '{user_id}'}})-[:{relationship}]->(memory:{memory_type})-[:HAS_DOCUMENT]->(document:Document {{summary: '{summary}'}})
|
||||
RETURN document.d_id AS d_id
|
||||
'''
|
||||
logging.info(f"Generated Cypher query: {query}")
|
||||
return query
|
||||
|
||||
result = self.query(query)
|
||||
return [record["d_id"] for record in result]
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while retrieving document IDs: {str(e)}")
|
||||
return None
|
||||
|
||||
def create_document_node_cypher(self, document_summary: dict, user_id: str) -> str:
|
||||
|
||||
def create_document_node_cypher(self, document_summary: dict, user_id: str,
|
||||
memory_type: str = "PublicMemory",public_memory_id:str=None) -> str:
|
||||
"""
|
||||
Generate a Cypher query to create a Document node linked to a SemanticMemory node for a user.
|
||||
Generate a Cypher query to create a Document node. If the memory type is 'Semantic',
|
||||
link it to a SemanticMemory node for a user. If the memory type is 'PublicMemory',
|
||||
only link the Document node to the PublicMemory node.
|
||||
|
||||
Parameters:
|
||||
- document_summary (dict): A dictionary containing the document's category, title, and summary.
|
||||
- document_summary (dict): A dictionary containing the document's category, title, summary, and document ID.
|
||||
- user_id (str): The unique identifier for the user.
|
||||
- memory_type (str): The type of memory node to link ("Semantic" or "PublicMemory"). Default is "PublicMemory".
|
||||
|
||||
Returns:
|
||||
- str: A Cypher query string with parameters.
|
||||
|
|
@ -509,45 +627,79 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
raise ValueError("The document_summary dictionary is missing required keys.")
|
||||
if not isinstance(user_id, str) or not user_id:
|
||||
raise ValueError("The user_id must be a non-empty string.")
|
||||
if memory_type not in ["SemanticMemory", "PublicMemory"]:
|
||||
raise ValueError("The memory_type must be either 'Semantic' or 'PublicMemory'.")
|
||||
|
||||
# Escape single quotes in the document summary data (if not using parameters)
|
||||
# Escape single quotes in the document summary data
|
||||
title = document_summary['Title'].replace("'", "\\'")
|
||||
summary = document_summary['Summary'].replace("'", "\\'")
|
||||
document_category = document_summary['DocumentCategory'].replace("'", "\\'")
|
||||
d_id = document_summary['d_id'].replace("'", "\\'")
|
||||
|
||||
# Generate the Cypher query using parameters
|
||||
memory_node_type = "SemanticMemory" if memory_type == "SemanticMemory" else "PublicMemory"
|
||||
|
||||
user_memory_link = ''
|
||||
if memory_type == "SemanticMemory":
|
||||
user_memory_link = f'''
|
||||
// Ensure the User node exists
|
||||
MERGE (user:User {{ userId: '{user_id}' }})
|
||||
MERGE (memory:SemanticMemory {{ userId: '{user_id}' }})
|
||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(memory)
|
||||
'''
|
||||
elif memory_type == "PublicMemory":
|
||||
logging.info(f"Public memory id: {public_memory_id}")
|
||||
user_memory_link = f'''
|
||||
// Merge with the existing PublicMemory node or create a new one if it does not exist
|
||||
MATCH (memory:PublicMemory {{ memoryId: {public_memory_id} }})
|
||||
'''
|
||||
|
||||
cypher_query = f'''
|
||||
// Ensure the User node exists
|
||||
MERGE (user:User {{ userId: '{user_id}' }})
|
||||
|
||||
// Ensure the SemanticMemory node exists and is connected to the User
|
||||
MERGE (semantic:SemanticMemory {{ userId: '{user_id}' }})
|
||||
MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic)
|
||||
|
||||
// Create the Document node with its properties
|
||||
CREATE (document:Document {{
|
||||
title: '{title}',
|
||||
summary: '{summary}',
|
||||
documentCategory: '{document_category}',
|
||||
d_id: '{d_id}'
|
||||
}})
|
||||
|
||||
// Link the Document node to the SemanticMemory node
|
||||
CREATE (semantic)-[:HAS_DOCUMENT]->(document)
|
||||
'''
|
||||
{user_memory_link}
|
||||
|
||||
// Create the Document node with its properties
|
||||
CREATE (document:Document {{
|
||||
title: '{title}',
|
||||
summary: '{summary}',
|
||||
documentCategory: '{document_category}',
|
||||
d_id: '{d_id}'
|
||||
}})
|
||||
|
||||
// Link the Document node to the {memory_node_type} node
|
||||
MERGE (memory)-[:HAS_DOCUMENT]->(document)
|
||||
'''
|
||||
|
||||
logging.info(f"Generated Cypher query: {cypher_query}")
|
||||
|
||||
return cypher_query
|
||||
|
||||
def update_document_node_with_namespace(self, user_id: str, vectordb_namespace: str, document_id: str):
|
||||
# Generate the Cypher query
|
||||
cypher_query = f'''
|
||||
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
|
||||
SET document.vectordbNamespace = '{vectordb_namespace}'
|
||||
RETURN document
|
||||
'''
|
||||
def update_document_node_with_db_ids(self, vectordb_namespace: str, document_id: str, user_id: str = None):
|
||||
"""
|
||||
Update the namespace of a Document node in the database. The document can be linked
|
||||
either to a SemanticMemory node (if a user ID is provided) or to a PublicMemory node.
|
||||
|
||||
Parameters:
|
||||
- vectordb_namespace (str): The namespace to set for the vectordb.
|
||||
- document_id (str): The unique identifier of the document.
|
||||
- user_id (str, optional): The unique identifier for the user. Default is None.
|
||||
|
||||
Returns:
|
||||
- str: A Cypher query string to perform the update.
|
||||
"""
|
||||
|
||||
if user_id:
|
||||
# Update for a document linked to a SemanticMemory node
|
||||
cypher_query = f'''
|
||||
MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
|
||||
SET document.vectordbNamespace = '{vectordb_namespace}'
|
||||
RETURN document
|
||||
'''
|
||||
else:
|
||||
# Update for a document linked to a PublicMemory node
|
||||
cypher_query = f'''
|
||||
MATCH (:PublicMemory)-[:HAS_DOCUMENT]->(document:Document {{d_id: '{document_id}'}})
|
||||
SET document.vectordbNamespace = '{vectordb_namespace}'
|
||||
RETURN document
|
||||
'''
|
||||
|
||||
return cypher_query
|
||||
|
||||
|
|
@ -581,6 +733,278 @@ class Neo4jGraphDB(AbstractGraphDB):
|
|||
logging.error(f"An error occurred while retrieving namespaces by document category: {str(e)}")
|
||||
return None
|
||||
|
||||
async def create_memory_node(self, labels, topic=None):
|
||||
"""
|
||||
Create or find a memory node of the specified type with labels and a description.
|
||||
|
||||
Args:
|
||||
labels (List[str]): A list of labels for the node.
|
||||
topic (str, optional): The type of memory node to create or find. Defaults to "PublicMemory".
|
||||
|
||||
Returns:
|
||||
int: The ID of the created or found memory node.
|
||||
|
||||
Raises:
|
||||
ValueError: If input parameters are invalid.
|
||||
Neo4jError: If an error occurs during the database operation.
|
||||
"""
|
||||
if topic is None:
|
||||
topic = "PublicMemory"
|
||||
|
||||
# Prepare labels as a string
|
||||
label_list = ', '.join(f"'{label}'" for label in labels)
|
||||
|
||||
# Cypher query to find or create the memory node with the given description and labels
|
||||
memory_cypher = f"""
|
||||
MERGE (memory:{topic} {{description: '{topic}', label: [{label_list}]}})
|
||||
SET memory.memoryId = ID(memory)
|
||||
RETURN id(memory) AS memoryId
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self.query(memory_cypher)
|
||||
# Assuming the result is a list of records, where each record contains 'memoryId'
|
||||
memory_id = result[0]['memoryId'] if result else None
|
||||
self.close()
|
||||
return memory_id
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error creating or finding memory node: {e}")
|
||||
raise
|
||||
|
||||
def link_user_to_public(self, user_id: str, public_property_value: str, public_property_name: str = 'name',
|
||||
relationship_type: str = 'HAS_PUBLIC'):
|
||||
if not user_id or not public_property_value:
|
||||
raise ValueError("Valid User ID and Public property value are required for linking.")
|
||||
|
||||
try:
|
||||
link_cypher = f"""
|
||||
MATCH (user:User {{userId: '{user_id}'}})
|
||||
MATCH (public:Public {{{public_property_name}: '{public_property_value}'}})
|
||||
MERGE (user)-[:{relationship_type}]->(public)
|
||||
"""
|
||||
self.query(link_cypher)
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error linking Public node to user: {e}")
|
||||
raise
|
||||
|
||||
def delete_memory_node(self, memory_id: int, topic: str) -> None:
|
||||
if not memory_id or not topic:
|
||||
raise ValueError("Memory ID and Topic are required for deletion.")
|
||||
|
||||
try:
|
||||
delete_cypher = f"""
|
||||
MATCH ({topic.lower()}: {topic}) WHERE id({topic.lower()}) = {memory_id}
|
||||
DETACH DELETE {topic.lower()}
|
||||
"""
|
||||
logging.info("Delete Cypher Query: %s", delete_cypher)
|
||||
self.query(delete_cypher)
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error deleting {topic} memory node: {e}")
|
||||
raise
|
||||
|
||||
def unlink_memory_from_user(self, memory_id: int, user_id: str, topic: str='PublicMemory') -> None:
|
||||
"""
|
||||
Unlink a memory node from a user node.
|
||||
|
||||
Parameters:
|
||||
- memory_id (int): The internal ID of the memory node.
|
||||
- user_id (str): The unique identifier for the user.
|
||||
- memory_type (str): The type of memory node to unlink ("SemanticMemory" or "PublicMemory").
|
||||
|
||||
Raises:
|
||||
- ValueError: If any required data is missing or invalid.
|
||||
"""
|
||||
|
||||
if not user_id or not isinstance(memory_id, int):
|
||||
raise ValueError("Valid User ID and Memory ID are required for unlinking.")
|
||||
|
||||
if topic not in ["SemanticMemory", "PublicMemory"]:
|
||||
raise ValueError("The memory_type must be either 'SemanticMemory' or 'PublicMemory'.")
|
||||
|
||||
relationship_type = "HAS_SEMANTIC_MEMORY" if topic == "SemanticMemory" else "HAS_PUBLIC_MEMORY"
|
||||
|
||||
try:
|
||||
unlink_cypher = f"""
|
||||
MATCH (user:User {{userId: '{user_id}'}})-[r:{relationship_type}]->(memory:{topic}) WHERE id(memory) = {memory_id}
|
||||
DELETE r
|
||||
"""
|
||||
self.query(unlink_cypher)
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error unlinking {topic} from user: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def link_public_memory_to_user(self, memory_id, user_id):
|
||||
# Link an existing Public Memory node to a User node
|
||||
link_cypher = f"""
|
||||
MATCH (user:User {{userId: '{user_id}'}})
|
||||
MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {memory_id}
|
||||
MERGE (user)-[:HAS_PUBLIC_MEMORY]->(publicMemory)
|
||||
"""
|
||||
self.query(link_cypher)
|
||||
|
||||
def retrieve_node_id_for_memory_type(self, topic: str = 'SemanticMemory'):
|
||||
link_cypher = f""" MATCH(publicMemory: {topic})
|
||||
RETURN
|
||||
id(publicMemory)
|
||||
AS
|
||||
memoryId """
|
||||
node_ids = self.query(link_cypher)
|
||||
return node_ids
|
||||
|
||||
|
||||
# def retrieve_linked_memory_for_user(self, user_id: str, topic: str, relationship_type: str = 'HAS_MEMORY'):
|
||||
# query = f"""
|
||||
# MATCH (user:User {{userId: $user_id}})-[:{relationship_type}]->({topic.lower()}:{topic})
|
||||
# RETURN {topic.lower()}
|
||||
# """
|
||||
# return self.query(query, params={"user_id": user_id})
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# async def link_memory_to_user(self, memory_id: int, user_id: str, relationship_type: str = 'HAS_MEMORY') -> None:
|
||||
# """
|
||||
# Link a memory node to a user with a specified relationship type.
|
||||
#
|
||||
# Args:
|
||||
# memory_id (int): The ID of the memory node.
|
||||
# user_id (str): The user ID to link the memory to.
|
||||
# relationship_type (str): The type of relationship.
|
||||
#
|
||||
# Raises:
|
||||
# ValueError: If input parameters are invalid.
|
||||
# Neo4jError: If an error occurs during the database operation.
|
||||
# """
|
||||
# if not user_id or not memory_id:
|
||||
# raise ValueError("User ID and Memory ID are required for linking.")
|
||||
#
|
||||
# try:
|
||||
# link_cypher = f"""
|
||||
# MATCH (user:User {{userId: '{user_id}'}})
|
||||
# MATCH (memory) WHERE id(memory) = {memory_id}
|
||||
# MERGE (user)-[:{relationship_type}]->(memory)
|
||||
# """
|
||||
# await self.query(link_cypher)
|
||||
# except Neo4jError as e:
|
||||
# logging.error(f"Error linking memory to user: {e}")
|
||||
# raise
|
||||
#
|
||||
# async def delete_memory_node(self, memory_id: int, memory_type: str) -> None:
|
||||
# """
|
||||
# Delete a memory node of a specified type.
|
||||
#
|
||||
# Args:
|
||||
# memory_id (int): The ID of the memory node to delete.
|
||||
# memory_type (str): The type of the memory node.
|
||||
#
|
||||
# Raises:
|
||||
# ValueError: If input parameters are invalid.
|
||||
# Neo4jError: If an error occurs during the database operation.
|
||||
# """
|
||||
# if not memory_id or not memory_type:
|
||||
# raise ValueError("Memory ID and Memory Type are required for deletion.")
|
||||
#
|
||||
# try:
|
||||
# delete_cypher = f"""
|
||||
# MATCH (memory:{memory_type}) WHERE id(memory) = {memory_id}
|
||||
# DETACH DELETE memory
|
||||
# """
|
||||
# await self.query(delete_cypher)
|
||||
# except Neo4jError as e:
|
||||
# logging.error(f"Error deleting memory node: {e}")
|
||||
# raise
|
||||
#
|
||||
# async def unlink_memory_from_user(self, memory_id: int, user_id: str, relationship_type: str = 'HAS_MEMORY') -> None:
|
||||
# """
|
||||
# Unlink a memory node from a user.
|
||||
#
|
||||
# Args:
|
||||
# memory_id (int): The ID of the memory node.
|
||||
# user_id (str): The user ID to unlink from the memory.
|
||||
# relationship_type (str): The type of relationship.
|
||||
#
|
||||
# Raises:
|
||||
# ValueError: If input parameters are invalid.
|
||||
# Neo4jError: If an error occurs during the database operation.
|
||||
# """
|
||||
# if not user_id or not memory_id:
|
||||
# raise ValueError("User ID and Memory ID are required for unlinking.")
|
||||
#
|
||||
# try:
|
||||
# unlink_cypher = f"""
|
||||
# MATCH (user:User {{userId: '{user_id}'}})-[r:{relationship_type}]->(memory) WHERE id(memory) = {memory_id}
|
||||
# DELETE r
|
||||
# """
|
||||
# await self.query(unlink_cypher)
|
||||
# except Neo4jError as e:
|
||||
# logging.error(f"Error unlinking memory from user: {e}")
|
||||
# raise
|
||||
#
|
||||
|
||||
#
|
||||
# def create_public_memory(self, labels, topic=None):
|
||||
# if topic is None:
|
||||
# topic = "SerbianArchitecture"
|
||||
# topicMemoryId = topic + "MemoryId"
|
||||
# # Create an independent Architecture Memory node with countries as properties
|
||||
# label_list = ', '.join(f"'{label}'" for label in labels) # Prepare countries list as a string
|
||||
# architecture_memory_cypher = f"""
|
||||
# CREATE ({topic.lower()}:{topic} {{description: '{topic}', label: [{label_list}]}})
|
||||
# RETURN id({topic.lower()}) AS {topicMemoryId}
|
||||
# """
|
||||
# return self.query(architecture_memory_cypher)
|
||||
#
|
||||
# def link_public_memory_to_user(self, public_memory_id, user_id):
|
||||
# # Link an existing Public Memory node to a User node
|
||||
# link_cypher = f"""
|
||||
# MATCH (user:User {{userId: '{user_id}'}})
|
||||
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
|
||||
# MERGE (user)-[:HAS_PUBLIC_MEMORY]->(publicMemory)
|
||||
# """
|
||||
# self.query(link_cypher)
|
||||
#
|
||||
# def link_public_memory_to_architecture(self, public_memory_id):
|
||||
# # Link the Public Memory node to the Architecture Memory node
|
||||
# link_cypher = f"""
|
||||
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
|
||||
# MATCH (architecture:Architecture {{description: 'Architecture'}})
|
||||
# MERGE (publicMemory)-[:INCLUDES]->(architecture)
|
||||
# """
|
||||
# self.query(link_cypher)
|
||||
#
|
||||
# def delete_public_memory(self, public_memory_id):
|
||||
# # Delete a Public Memory node by its ID
|
||||
# delete_cypher = f"""
|
||||
# MATCH (publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
|
||||
# DETACH DELETE publicMemory
|
||||
# """
|
||||
# self.query(delete_cypher)
|
||||
#
|
||||
# def delete_architecture_memory(self, architecture_memory_id):
|
||||
# # Delete an Architecture Memory node by its ID
|
||||
# delete_cypher = f"""
|
||||
# MATCH (architecture:Architecture) WHERE id(architecture) = {architecture_memory_id}
|
||||
# DETACH DELETE architecture
|
||||
# """
|
||||
# self.query(delete_cypher)
|
||||
#
|
||||
# def unlink_public_memory_from_user(self, public_memory_id, user_id):
|
||||
# # Unlink a Public Memory node from a User node
|
||||
# unlink_cypher = f"""
|
||||
# MATCH (user:User {{userId: '{user_id}'}})-[r:HAS_PUBLIC_MEMORY]->(publicMemory:PublicMemory) WHERE id(publicMemory) = {public_memory_id}
|
||||
# DELETE r
|
||||
# """
|
||||
# self.query(unlink_cypher)
|
||||
#
|
||||
# def unlink_public_memory_from_architecture(self, public_memory_id):
|
||||
# # Unlink the Public Memory node from the Architecture Memory node
|
||||
# unlink_cypher = f"""
|
||||
# MATCH (publicMemory:PublicMemory)-[r:INCLUDES]->(architecture:Architecture) WHERE id(publicMemory) = {public_memory_id}
|
||||
# DELETE r
|
||||
# """
|
||||
# self.query(unlink_cypher)
|
||||
|
||||
|
||||
class NetworkXGraphDB:
|
||||
def __init__(self, filename='networkx_graph.pkl'):
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class DocsModel(Base):
|
|||
operation_id = Column(String, ForeignKey('operations.id'), index=True)
|
||||
doc_name = Column(String, nullable=True)
|
||||
graph_summary = Column(Boolean, nullable=True)
|
||||
memory_category = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, onupdate=datetime.utcnow)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class MemoryModel(Base):
|
|||
user_id = Column(String, ForeignKey('users.id'), index=True)
|
||||
operation_id = Column(String, ForeignKey('operations.id'), index=True)
|
||||
memory_name = Column(String, nullable=True)
|
||||
memory_category = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, onupdate=datetime.utcnow)
|
||||
methods_list = Column(String , nullable=True)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
from langchain.document_loaders import PyPDFLoader
|
||||
import sys, os
|
||||
|
||||
from cognitive_architecture.shared.chunk_strategy import ChunkStrategy
|
||||
from cognitive_architecture.database.vectordb.chunkers.chunk_strategy import ChunkStrategy
|
||||
import re
|
||||
def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None):
|
||||
|
||||
|
|
@ -32,10 +29,13 @@ def vanilla_chunker(source_data, chunk_size=100, chunk_overlap=20):
|
|||
chunk_overlap=chunk_overlap,
|
||||
length_function=len
|
||||
)
|
||||
try:
|
||||
pages = text_splitter.create_documents([source_data])
|
||||
except:
|
||||
pages = text_splitter.create_documents(source_data.content)
|
||||
# try:
|
||||
# pages = text_splitter.create_documents([source_data])
|
||||
# except:
|
||||
# try:
|
||||
pages = text_splitter.create_documents([source_data])
|
||||
# except:
|
||||
# pages = text_splitter.create_documents(source_data.content)
|
||||
# pages = source_data.load_and_split()
|
||||
return pages
|
||||
|
||||
|
|
|
|||
151
cognitive_architecture/database/vectordb/cognee_manager.py
Normal file
151
cognitive_architecture/database/vectordb/cognee_manager.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
import os
|
||||
import requests
|
||||
import json
|
||||
from .embeddings import Embeddings
|
||||
from .vector_db import VectorDB
|
||||
from .response import Response
|
||||
|
||||
|
||||
class CogneeManager:
|
||||
def __init__(self, embeddings: Embeddings = None,
|
||||
vector_db: VectorDB = None,
|
||||
vector_db_key: str = None,
|
||||
embedding_api_key: str = None,
|
||||
webhook_url: str = None,
|
||||
lines_per_batch: int = 1000,
|
||||
webhook_key: str = None,
|
||||
document_id: str = None,
|
||||
chunk_validation_url: str = None,
|
||||
internal_api_key: str = "test123",
|
||||
base_url="http://localhost:8000"):
|
||||
self.embeddings = embeddings if embeddings else Embeddings()
|
||||
self.vector_db = vector_db if vector_db else VectorDB()
|
||||
self.webhook_url = webhook_url
|
||||
self.lines_per_batch = lines_per_batch
|
||||
self.webhook_key = webhook_key
|
||||
self.document_id = document_id
|
||||
self.chunk_validation_url = chunk_validation_url
|
||||
self.vector_db_key = vector_db_key
|
||||
self.embeddings_api_key = embedding_api_key
|
||||
self.internal_api_key = internal_api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def serialize(self):
|
||||
data = {
|
||||
'EmbeddingsMetadata': json.dumps(self.embeddings.serialize()),
|
||||
'VectorDBMetadata': json.dumps(self.vector_db.serialize()),
|
||||
'WebhookURL': self.webhook_url,
|
||||
'LinesPerBatch': self.lines_per_batch,
|
||||
'DocumentID': self.document_id,
|
||||
'ChunkValidationURL': self.chunk_validation_url,
|
||||
}
|
||||
return {k: v for k, v in data.items() if v is not None}
|
||||
|
||||
def upload(self, file_paths: list[str], base_url=None):
|
||||
if base_url:
|
||||
url = base_url + "/jobs"
|
||||
else:
|
||||
url = self.base_url + "/jobs"
|
||||
|
||||
data = self.serialize()
|
||||
headers = self.generate_headers()
|
||||
multipart_form_data = [('file', (os.path.basename(filepath), open(filepath, 'rb'), 'application/octet-stream'))
|
||||
for filepath in file_paths]
|
||||
|
||||
print(f"embedding {len(file_paths)} documents at {url}")
|
||||
response = requests.post(url, files=multipart_form_data, headers=headers, stream=True, data=data)
|
||||
|
||||
if response.status_code == 500:
|
||||
print(response.text)
|
||||
return Response(error=response.text, status_code=response.status_code)
|
||||
|
||||
response_json = response.json()
|
||||
if response.status_code >= 400 and response.status_code < 500:
|
||||
print(f"Error: {response_json['error']}")
|
||||
|
||||
return Response.from_json(response_json, response.status_code)
|
||||
|
||||
def get_job_statuses(self, job_ids: list[int], base_url=None):
|
||||
if base_url:
|
||||
url = base_url + "/jobs/status"
|
||||
else:
|
||||
url = self.base_url + "/jobs/status"
|
||||
|
||||
headers = {
|
||||
"Authorization": self.internal_api_key,
|
||||
}
|
||||
|
||||
data = {
|
||||
'JobIDs': job_ids
|
||||
}
|
||||
|
||||
print(f"retrieving job statuses for {len(job_ids)} jobs at {url}")
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
|
||||
if response.status_code == 500:
|
||||
print(response.text)
|
||||
return Response(error=response.text, status_code=response.status_code)
|
||||
|
||||
response_json = response.json()
|
||||
if response.status_code >= 400 and response.status_code < 500:
|
||||
print(f"Error: {response_json['error']}")
|
||||
|
||||
return Response.from_json(response_json, response.status_code)
|
||||
|
||||
def embed(self, filepath, base_url=None):
|
||||
if base_url:
|
||||
url = base_url + "/embed"
|
||||
else:
|
||||
url = self.base_url + "/embed"
|
||||
|
||||
data = self.serialize()
|
||||
headers = self.generate_headers()
|
||||
|
||||
files = {
|
||||
'SourceData': open(filepath, 'rb')
|
||||
}
|
||||
|
||||
print(f"embedding document at file path {filepath} at {url}")
|
||||
response = requests.post(url, headers=headers, data=data, files=files)
|
||||
|
||||
if response.status_code == 500:
|
||||
print(response.text)
|
||||
return Response(error=response.text, status_code=response.status_code)
|
||||
|
||||
response_json = response.json()
|
||||
if response.status_code >= 400 and response.status_code < 500:
|
||||
print(f"Error: {response_json['error']}")
|
||||
|
||||
return Response.from_json(response_json, response.status_code)
|
||||
|
||||
def get_job_status(self, job_id, base_url=None):
|
||||
if base_url:
|
||||
url = base_url + "/jobs/" + str(job_id) + "/status"
|
||||
else:
|
||||
url = self.base_url + "/jobs/" + str(job_id) + "/status"
|
||||
|
||||
headers = {
|
||||
"Authorization": self.internal_api_key,
|
||||
}
|
||||
|
||||
print(f"retrieving job status for job {job_id} at {url}")
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
if response.status_code == 500:
|
||||
print(response.text)
|
||||
return Response(error=response.text, status_code=response.status_code)
|
||||
|
||||
response_json = response.json()
|
||||
if response.status_code >= 400 and response.status_code < 500:
|
||||
print(f"Error: {response_json['error']}")
|
||||
|
||||
return Response.from_json(response_json, response.status_code)
|
||||
|
||||
def generate_headers(self):
|
||||
headers = {
|
||||
"Authorization": self.internal_api_key,
|
||||
"X-EmbeddingAPI-Key": self.embeddings_api_key,
|
||||
"X-VectorDB-Key": self.vector_db_key,
|
||||
"X-Webhook-Key": self.webhook_key
|
||||
}
|
||||
return {k: v for k, v in headers.items() if v is not None}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
from .embeddings_type import EmbeddingsType
|
||||
from ..chunkers.chunk_strategy import ChunkStrategy
|
||||
|
||||
|
||||
class Embeddings:
|
||||
def __init__(self, embeddings_type: EmbeddingsType = EmbeddingsType.OPEN_AI,
|
||||
chunk_size: int = 256,
|
||||
chunk_overlap: int = 128,
|
||||
chunk_strategy: ChunkStrategy = ChunkStrategy.EXACT,
|
||||
docker_image: str = None,
|
||||
hugging_face_model_name: str = None):
|
||||
self.embeddings_type = embeddings_type
|
||||
self.chunk_size = chunk_size
|
||||
self.chunk_overlap = chunk_overlap
|
||||
self.chunk_strategy = chunk_strategy
|
||||
self.docker_image = docker_image
|
||||
self.hugging_face_model_name = hugging_face_model_name
|
||||
|
||||
def serialize(self):
|
||||
data = {
|
||||
'embeddings_type': self.embeddings_type.name if self.embeddings_type else None,
|
||||
'chunk_size': self.chunk_size,
|
||||
'chunk_overlap': self.chunk_overlap,
|
||||
'chunk_strategy': self.chunk_strategy.name if self.chunk_strategy else None,
|
||||
'docker_image': self.docker_image,
|
||||
'hugging_face_model_name': self.hugging_face_model_name
|
||||
}
|
||||
|
||||
return {k: v for k, v in data.items() if v is not None}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
from enum import Enum
|
||||
|
||||
class EmbeddingsType(Enum):
|
||||
OPEN_AI = 'open_ai'
|
||||
COHERE = 'cohere'
|
||||
SELF_HOSTED = 'self_hosted'
|
||||
HUGGING_FACE = 'hugging_face'
|
||||
IMAGE = 'image'
|
||||
18
cognitive_architecture/database/vectordb/job.py
Normal file
18
cognitive_architecture/database/vectordb/job.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
class Job:
|
||||
def __init__(self, job_id, job_status=None, filename=None):
|
||||
self.job_id = job_id
|
||||
self.job_status = job_status
|
||||
self.filename = filename
|
||||
|
||||
def __str__(self):
|
||||
attributes = []
|
||||
if self.job_id is not None:
|
||||
attributes.append(f"job_id: {self.job_id}")
|
||||
if self.job_status is not None:
|
||||
attributes.append(f"job_status: {self.job_status}")
|
||||
if self.filename is not None:
|
||||
attributes.append(f"filename: {self.filename}")
|
||||
return "Job(" + ", ".join(attributes) + ")"
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
|
@ -4,6 +4,7 @@ import os
|
|||
import sys
|
||||
|
||||
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
|
||||
from cognitive_architecture.shared.language_processing import translate_text, detect_language
|
||||
|
||||
from langchain.document_loaders import UnstructuredURLLoader
|
||||
from langchain.document_loaders import DirectoryLoader
|
||||
|
|
@ -11,14 +12,45 @@ import logging
|
|||
import os
|
||||
from langchain.document_loaders import TextLoader
|
||||
import requests
|
||||
async def _document_loader( observation: str, loader_settings: dict):
|
||||
|
||||
|
||||
async def fetch_pdf_content(file_url):
|
||||
response = requests.get(file_url)
|
||||
pdf_stream = BytesIO(response.content)
|
||||
with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
|
||||
return "".join(page.get_text() for page in doc)
|
||||
|
||||
async def fetch_text_content(file_url):
|
||||
loader = UnstructuredURLLoader(urls=file_url)
|
||||
return loader.load()
|
||||
|
||||
async def process_content(content, metadata, loader_strategy, chunk_size, chunk_overlap):
|
||||
pages = chunk_data(chunk_strategy=loader_strategy, source_data=content, chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
|
||||
if metadata is None:
|
||||
metadata = {"metadata": "None"}
|
||||
|
||||
chunk_count= 0
|
||||
|
||||
for chunk in pages:
|
||||
chunk_count+=1
|
||||
chunk.metadata = metadata
|
||||
chunk.metadata["chunk_count"]=chunk_count
|
||||
if detect_language(pages) != "en":
|
||||
logging.info("Translating Page")
|
||||
for page in pages:
|
||||
if detect_language(page.page_content) != "en":
|
||||
page.page_content = translate_text(page.page_content)
|
||||
|
||||
return pages
|
||||
|
||||
async def _document_loader(observation: str, loader_settings: dict):
|
||||
document_format = loader_settings.get("format", "text")
|
||||
loader_strategy = loader_settings.get("strategy", "VANILLA")
|
||||
chunk_size = loader_settings.get("chunk_size", 500)
|
||||
chunk_overlap = loader_settings.get("chunk_overlap", 20)
|
||||
|
||||
|
||||
logging.info("LOADER SETTINGS %s", loader_settings)
|
||||
|
||||
list_of_docs = loader_settings["path"]
|
||||
|
|
@ -27,55 +59,146 @@ async def _document_loader( observation: str, loader_settings: dict):
|
|||
if loader_settings.get("source") == "URL":
|
||||
for file in list_of_docs:
|
||||
if document_format == "PDF":
|
||||
logging.info("File is %s", file)
|
||||
pdf_response = requests.get(file)
|
||||
pdf_stream = BytesIO(pdf_response.content)
|
||||
with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
|
||||
file_content = ""
|
||||
for page in doc:
|
||||
file_content += page.get_text()
|
||||
pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
|
||||
chunked_doc.append(pages)
|
||||
|
||||
content = await fetch_pdf_content(file)
|
||||
elif document_format == "TEXT":
|
||||
loader = UnstructuredURLLoader(urls=file)
|
||||
file_content = loader.load()
|
||||
pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
chunked_doc.append(pages)
|
||||
content = await fetch_text_content(file)
|
||||
else:
|
||||
raise ValueError(f"Unsupported document format: {document_format}")
|
||||
|
||||
pages = await process_content(content, metadata=None, loader_strategy=loader_strategy, chunk_size= chunk_size, chunk_overlap= chunk_overlap)
|
||||
chunked_doc.append(pages)
|
||||
|
||||
elif loader_settings.get("source") == "DEVICE":
|
||||
|
||||
current_directory = os.getcwd()
|
||||
logging.info("Current Directory: %s", current_directory)
|
||||
|
||||
loader = DirectoryLoader(".data", recursive=True)
|
||||
if document_format == "PDF":
|
||||
# loader = SimpleDirectoryReader(".data", recursive=True, exclude_hidden=True)
|
||||
if loader_settings.get("bulk_load", False) == True:
|
||||
current_directory = os.getcwd()
|
||||
logging.info("Current Directory: %s", current_directory)
|
||||
loader = DirectoryLoader(".data", recursive=True)
|
||||
documents = loader.load()
|
||||
pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
logging.info("Documents: %s", documents)
|
||||
# pages = documents.load_and_split()
|
||||
chunked_doc.append(pages)
|
||||
|
||||
|
||||
elif document_format == "TEXT":
|
||||
documents = loader.load()
|
||||
pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
logging.info("Documents: %s", documents)
|
||||
# pages = documents.load_and_split()
|
||||
chunked_doc.append(pages)
|
||||
for document in documents:
|
||||
# print ("Document: ", document.page_content)
|
||||
pages = await process_content(content= str(document.page_content), metadata=document.metadata, loader_strategy= loader_strategy, chunk_size = chunk_size, chunk_overlap = chunk_overlap)
|
||||
chunked_doc.append(pages)
|
||||
else:
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
loader = PyPDFLoader(loader_settings.get("single_document_path"))
|
||||
documents= loader.load()
|
||||
|
||||
for document in documents:
|
||||
pages = await process_content(content=str(document.page_content), metadata=document.metadata,
|
||||
loader_strategy=loader_strategy, chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
chunked_doc.append(pages)
|
||||
else:
|
||||
raise ValueError(f"Error: ")
|
||||
raise ValueError(f"Unsupported source type: {loader_settings.get('source')}")
|
||||
|
||||
return chunked_doc
|
||||
|
||||
|
||||
|
||||
# async def _document_loader( observation: str, loader_settings: dict):
|
||||
#
|
||||
# document_format = loader_settings.get("format", "text")
|
||||
# loader_strategy = loader_settings.get("strategy", "VANILLA")
|
||||
# chunk_size = loader_settings.get("chunk_size", 500)
|
||||
# chunk_overlap = loader_settings.get("chunk_overlap", 20)
|
||||
#
|
||||
#
|
||||
# logging.info("LOADER SETTINGS %s", loader_settings)
|
||||
#
|
||||
# list_of_docs = loader_settings["path"]
|
||||
# chunked_doc = []
|
||||
#
|
||||
# if loader_settings.get("source") == "URL":
|
||||
# for file in list_of_docs:
|
||||
# if document_format == "PDF":
|
||||
# logging.info("File is %s", file)
|
||||
# pdf_response = requests.get(file)
|
||||
# pdf_stream = BytesIO(pdf_response.content)
|
||||
# with fitz.open(stream=pdf_stream, filetype='pdf') as doc:
|
||||
# file_content = ""
|
||||
# for page in doc:
|
||||
# file_content += page.get_text()
|
||||
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
|
||||
# chunk_overlap=chunk_overlap)
|
||||
# from cognitive_architecture.shared.language_processing import translate_text,detect_language
|
||||
#
|
||||
# if detect_language(pages) != "en":
|
||||
# logging.info("Current Directory 3")
|
||||
# for page in pages:
|
||||
# if detect_language(page.page_content) != "en":
|
||||
# logging.info("Translating Page")
|
||||
# page.page_content = translate_text(page.page_content)
|
||||
#
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# logging.info("Document translation complete. Proceeding...")
|
||||
#
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# elif document_format == "TEXT":
|
||||
# loader = UnstructuredURLLoader(urls=file)
|
||||
# file_content = loader.load()
|
||||
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
|
||||
# chunk_overlap=chunk_overlap)
|
||||
#
|
||||
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
|
||||
#
|
||||
# if detect_language(pages) != "en":
|
||||
# logging.info("Current Directory 3")
|
||||
# for page in pages:
|
||||
# if detect_language(page.page_content) != "en":
|
||||
# logging.info("Translating Page")
|
||||
# page.page_content = translate_text(page.page_content)
|
||||
#
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# logging.info("Document translation complete. Proceeding...")
|
||||
#
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# elif loader_settings.get("source") == "DEVICE":
|
||||
#
|
||||
# current_directory = os.getcwd()
|
||||
# logging.info("Current Directory: %s", current_directory)
|
||||
#
|
||||
# loader = DirectoryLoader(".data", recursive=True)
|
||||
# if document_format == "PDF":
|
||||
# # loader = SimpleDirectoryReader(".data", recursive=True, exclude_hidden=True)
|
||||
# documents = loader.load()
|
||||
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
|
||||
# chunk_overlap=chunk_overlap)
|
||||
# logging.info("Documents: %s", documents)
|
||||
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
|
||||
#
|
||||
# if detect_language(pages) != "en":
|
||||
# logging.info("Current Directory 3")
|
||||
# for page in pages:
|
||||
# if detect_language(page.page_content) != "en":
|
||||
# logging.info("Translating Page")
|
||||
# page.page_content = translate_text(page.page_content)
|
||||
#
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# logging.info("Document translation complete. Proceeding...")
|
||||
#
|
||||
# # pages = documents.load_and_split()
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
#
|
||||
# elif document_format == "TEXT":
|
||||
# documents = loader.load()
|
||||
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
|
||||
# chunk_overlap=chunk_overlap)
|
||||
# logging.info("Documents: %s", documents)
|
||||
# # pages = documents.load_and_split()
|
||||
# chunked_doc.append(pages)
|
||||
#
|
||||
# else:
|
||||
# raise ValueError(f"Error: ")
|
||||
# return chunked_doc
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
72
cognitive_architecture/database/vectordb/response.py
Normal file
72
cognitive_architecture/database/vectordb/response.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
from .job import Job
|
||||
|
||||
|
||||
class Response:
|
||||
def __init__(self, error=None, message=None, successful_uploads=None, failed_uploads=None,
|
||||
empty_files_count=None, duplicate_files_count=None, job_id=None,
|
||||
jobs=None, job_status=None, status_code=None):
|
||||
self.error = error
|
||||
self.message = message
|
||||
self.successful_uploads = successful_uploads
|
||||
self.failed_uploads = failed_uploads
|
||||
self.empty_files_count = empty_files_count
|
||||
self.duplicate_files_count = duplicate_files_count
|
||||
self.job_id = job_id
|
||||
self.jobs = jobs
|
||||
self.job_status = job_status
|
||||
self.status_code = status_code
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_dict, status_code):
|
||||
successful_uploads = cls._convert_successful_uploads_to_jobs(json_dict.get('successful_uploads', None))
|
||||
jobs = cls._convert_to_jobs(json_dict.get('Jobs', None))
|
||||
|
||||
return cls(
|
||||
error=json_dict.get('error'),
|
||||
message=json_dict.get('message'),
|
||||
successful_uploads=successful_uploads,
|
||||
failed_uploads=json_dict.get('failed_uploads'),
|
||||
empty_files_count=json_dict.get('empty_files_count'),
|
||||
duplicate_files_count=json_dict.get('duplicate_files_count'),
|
||||
job_id=json_dict.get('JobID'),
|
||||
jobs=jobs,
|
||||
job_status=json_dict.get('JobStatus'),
|
||||
status_code=status_code
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _convert_successful_uploads_to_jobs(cls, successful_uploads):
|
||||
if not successful_uploads:
|
||||
return None
|
||||
return [Job(filename=key, job_id=val) for key, val in successful_uploads.items()]
|
||||
|
||||
@classmethod
|
||||
def _convert_to_jobs(cls, jobs):
|
||||
if not jobs:
|
||||
return None
|
||||
return [Job(job_id=job['JobID'], job_status=job['JobStatus']) for job in jobs]
|
||||
|
||||
def __str__(self):
|
||||
attributes = []
|
||||
if self.error is not None:
|
||||
attributes.append(f"error: {self.error}")
|
||||
if self.message is not None:
|
||||
attributes.append(f"message: {self.message}")
|
||||
if self.successful_uploads is not None:
|
||||
attributes.append(f"successful_uploads: {str(self.successful_uploads)}")
|
||||
if self.failed_uploads is not None:
|
||||
attributes.append(f"failed_uploads: {self.failed_uploads}")
|
||||
if self.empty_files_count is not None:
|
||||
attributes.append(f"empty_files_count: {self.empty_files_count}")
|
||||
if self.duplicate_files_count is not None:
|
||||
attributes.append(f"duplicate_files_count: {self.duplicate_files_count}")
|
||||
if self.job_id is not None:
|
||||
attributes.append(f"job_id: {self.job_id}")
|
||||
if self.jobs is not None:
|
||||
attributes.append(f"jobs: {str(self.jobs)}")
|
||||
if self.job_status is not None:
|
||||
attributes.append(f"job_status: {self.job_status}")
|
||||
if self.status_code is not None:
|
||||
attributes.append(f"status_code: {self.status_code}")
|
||||
|
||||
return "Response(" + ", ".join(attributes) + ")"
|
||||
13
cognitive_architecture/database/vectordb/vector_db_type.py
Normal file
13
cognitive_architecture/database/vectordb/vector_db_type.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from enum import Enum
|
||||
|
||||
class VectorDBType(Enum):
|
||||
PINECONE = 'pinecone'
|
||||
WEAVIATE = 'weaviate'
|
||||
MILVUS = 'milvus'
|
||||
QDRANT = 'qdrant'
|
||||
DEEPLAKE = 'deeplake'
|
||||
VESPA = 'vespa'
|
||||
PGVECTOR = 'pgvector'
|
||||
REDIS = 'redis'
|
||||
LANCEDB = 'lancedb'
|
||||
MONGODB = 'mongodb'
|
||||
|
|
@ -130,8 +130,6 @@ class WeaviateVectorDB(VectorDB):
|
|||
|
||||
def _stuct(self, observation, params, metadata_schema_class =None):
|
||||
"""Utility function to create the document structure with optional custom fields."""
|
||||
|
||||
|
||||
# Construct document data
|
||||
document_data = {
|
||||
"metadata": params,
|
||||
|
|
@ -152,20 +150,22 @@ class WeaviateVectorDB(VectorDB):
|
|||
if namespace is None:
|
||||
namespace = self.namespace
|
||||
params['user_id'] = self.user_id
|
||||
logging.info("User id is %s", self.user_id)
|
||||
retriever = self.init_weaviate(embeddings=OpenAIEmbeddings(),namespace = namespace, retriever_type="single_document_context")
|
||||
if loader_settings:
|
||||
# Assuming _document_loader returns a list of documents
|
||||
documents = await _document_loader(observation, loader_settings)
|
||||
logging.info("here are the docs %s", str(documents))
|
||||
chunk_count = 0
|
||||
for doc in documents[0]:
|
||||
chunk_count += 1
|
||||
params['chunk_order'] = chunk_count
|
||||
# document_to_load = self._stuct(doc.page_content, params, metadata_schema_class)
|
||||
|
||||
# logging.info("Loading document with provided loader settings %s", str(document_to_load))
|
||||
retriever.add_documents([
|
||||
Document(metadata=params, page_content=doc.page_content)])
|
||||
for doc_list in documents:
|
||||
for doc in doc_list:
|
||||
chunk_count += 1
|
||||
params['chunk_count'] = doc.metadata.get("chunk_count", "None")
|
||||
logging.info("Loading document with provided loader settings %s", str(doc))
|
||||
params['source'] = doc.metadata.get("source", "None")
|
||||
logging.info("Params are %s", str(params))
|
||||
retriever.add_documents([
|
||||
Document(metadata=params, page_content=doc.page_content)])
|
||||
else:
|
||||
chunk_count = 0
|
||||
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
|
||||
|
|
@ -174,16 +174,13 @@ class WeaviateVectorDB(VectorDB):
|
|||
for doc in documents[0]:
|
||||
chunk_count += 1
|
||||
params['chunk_order'] = chunk_count
|
||||
|
||||
# document_to_load = self._stuct(observation, params, metadata_schema_class)
|
||||
|
||||
logging.info("Loading document with defautl loader settings %s", str(doc))
|
||||
|
||||
# logging.info("Loading document with defautl loader settings %s", str(document_to_load))
|
||||
params['source'] = "User loaded"
|
||||
logging.info("Loading document with default loader settings %s", str(doc))
|
||||
logging.info("Params are %s", str(params))
|
||||
retriever.add_documents([
|
||||
Document(metadata=params, page_content=doc.page_content)])
|
||||
|
||||
async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs):
|
||||
async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid',params=None, **kwargs):
|
||||
"""
|
||||
Fetch documents from weaviate.
|
||||
|
||||
|
|
@ -276,6 +273,33 @@ class WeaviateVectorDB(VectorDB):
|
|||
).with_additional(
|
||||
["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance']
|
||||
).with_where(filter_object).with_limit(30)
|
||||
query_output = (
|
||||
base_query
|
||||
# .with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
|
||||
.do()
|
||||
)
|
||||
|
||||
elif search_type == 'summary_filter_by_object_name':
|
||||
filter_object = {
|
||||
"operator": "And",
|
||||
"operands": [
|
||||
{
|
||||
"path": ["user_id"],
|
||||
"operator": "Equal",
|
||||
"valueText": self.user_id,
|
||||
},
|
||||
{
|
||||
"path": ["doc_id"],
|
||||
"operator": "Equal",
|
||||
"valueText": params,
|
||||
},
|
||||
]
|
||||
}
|
||||
base_query = client.query.get(
|
||||
namespace, list(list_objects_of_class(namespace, client.schema.get()))
|
||||
).with_additional(
|
||||
["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance']
|
||||
).with_where(filter_object).with_limit(30).with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
|
||||
query_output = (
|
||||
base_query
|
||||
.do()
|
||||
|
|
|
|||
92
cognitive_architecture/shared/language_processing.py
Normal file
92
cognitive_architecture/shared/language_processing.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import boto3
|
||||
from botocore.exceptions import BotoCoreError, ClientError
|
||||
from langdetect import detect, LangDetectException
|
||||
import iso639
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
import logging
|
||||
|
||||
# Basic configuration of the logging system
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
|
||||
|
||||
def detect_language(text):
|
||||
"""
|
||||
Detect the language of the given text and return its ISO 639-1 language code.
|
||||
If the detected language is Croatian ('hr'), it maps to Serbian ('sr').
|
||||
The text is trimmed to the first 100 characters for efficient processing.
|
||||
|
||||
Parameters:
|
||||
text (str): The text for language detection.
|
||||
|
||||
Returns:
|
||||
str: The ISO 639-1 language code of the detected language, or 'None' in case of an error.
|
||||
"""
|
||||
|
||||
# Trim the text to the first 100 characters
|
||||
trimmed_text = text[:100]
|
||||
|
||||
try:
|
||||
# Detect the language using langdetect
|
||||
detected_lang_iso639_1 = detect(trimmed_text)
|
||||
logging.info(f"Detected ISO 639-1 code: {detected_lang_iso639_1}")
|
||||
|
||||
# Special case: map 'hr' (Croatian) to 'sr' (Serbian ISO 639-2)
|
||||
if detected_lang_iso639_1 == 'hr':
|
||||
return 'sr'
|
||||
return detected_lang_iso639_1
|
||||
|
||||
except LangDetectException as e:
|
||||
logging.error(f"Language detection error: {e}")
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error: {e}")
|
||||
|
||||
return -1
|
||||
|
||||
|
||||
|
||||
def translate_text(text, source_language:str='sr', target_language:str='en', region_name='eu-west-1'):
|
||||
"""
|
||||
Translate text from source language to target language using AWS Translate.
|
||||
|
||||
|
||||
Parameters:
|
||||
text (str): The text to be translated.
|
||||
source_language (str): The source language code (e.g., 'sr' for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
target_language (str): The target language code (e.g., 'en' for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
region_name (str): AWS region name.
|
||||
|
||||
Returns:
|
||||
str: Translated text or an error message.
|
||||
"""
|
||||
if not text:
|
||||
return "No text provided for translation."
|
||||
|
||||
if not source_language or not target_language:
|
||||
return "Both source and target language codes are required."
|
||||
|
||||
try:
|
||||
translate = boto3.client(service_name='translate', region_name=region_name, use_ssl=True)
|
||||
result = translate.translate_text(Text=text, SourceLanguageCode=source_language, TargetLanguageCode=target_language)
|
||||
return result.get('TranslatedText', 'No translation found.')
|
||||
|
||||
except BotoCoreError as e:
|
||||
logging.info(f"BotoCoreError occurred: {e}")
|
||||
return "Error with AWS Translate service configuration or request."
|
||||
|
||||
except ClientError as e:
|
||||
logging.info(f"ClientError occurred: {e}")
|
||||
return "Error with AWS client or network issue."
|
||||
|
||||
|
||||
source_language = 'sr'
|
||||
target_language = 'en'
|
||||
text_to_translate = "Ja volim da pecam i idem na reku da šetam pored nje ponekad"
|
||||
|
||||
translated_text = translate_text(text_to_translate, source_language, target_language)
|
||||
print(translated_text)
|
||||
|
||||
|
||||
# print(detect_language("Koliko krava ide u setnju?"))
|
||||
|
|
@ -7,6 +7,11 @@ from graphviz import Digraph
|
|||
from sqlalchemy import or_
|
||||
from sqlalchemy.orm import contains_eager
|
||||
|
||||
from cognitive_architecture.database.postgres.database import AsyncSessionLocal
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# from graph_database.graph import KnowledgeGraph
|
||||
|
||||
|
|
@ -217,10 +222,11 @@ async def get_unsumarized_vector_db_namespace(session: AsyncSession, user_id: st
|
|||
operations = result.unique().scalars().all()
|
||||
|
||||
# Extract memory names and document names and IDs
|
||||
memory_names = [memory.memory_name for op in operations for memory in op.memories]
|
||||
# memory_names = [memory.memory_name for op in operations for memory in op.memories]
|
||||
memory_details = [(memory.memory_name, memory.memory_category) for op in operations for memory in op.memories]
|
||||
docs = [(doc.doc_name, doc.id) for op in operations for doc in op.docs]
|
||||
|
||||
return memory_names, docs
|
||||
return memory_details, docs
|
||||
|
||||
# except Exception as e:
|
||||
# # Handle the exception as needed
|
||||
|
|
@ -264,4 +270,42 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
|
|||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# async def main():
|
||||
# user_id = "user"
|
||||
#
|
||||
# async with session_scope(AsyncSessionLocal()) as session:
|
||||
# output = await get_unsumarized_vector_db_namespace(session, user_id)
|
||||
#
|
||||
# print(output)
|
||||
# # await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
|
||||
# # out = await get_vectordb_namespace(session, user_id)
|
||||
# # params = {
|
||||
# # "version": "1.0",
|
||||
# # "agreement_id": "AG123456",
|
||||
# # "privacy_policy": "https://example.com/privacy",
|
||||
# # "terms_of_service": "https://example.com/terms",
|
||||
# # "format": "json",
|
||||
# # "schema_version": "1.1",
|
||||
# # "checksum": "a1b2c3d4e5f6",
|
||||
# # "owner": "John Doe",
|
||||
# # "license": "MIT",
|
||||
# # "validity_start": "2023-08-01",
|
||||
# # "validity_end": "2024-07-31",
|
||||
# # }
|
||||
# # loader_settings = {
|
||||
# # "format": "PDF",
|
||||
# # "source": "DEVICE",
|
||||
# # "path": [".data"],
|
||||
# # "strategy": "SUMMARY",
|
||||
# # }
|
||||
# # await load_documents_to_vectorstore(session, user_id, loader_settings=loader_settings)
|
||||
# # await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday")
|
||||
# # await add_documents_to_graph_db(session, user_id, loader_settings=loader_settings)
|
||||
# # await user_context_enrichment(session, user_id, query="Tell me about the book I read yesterday")
|
||||
#
|
||||
#
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
#
|
||||
# asyncio.run(main())
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ class Memory:
|
|||
return f"Error creating user: {str(e)}"
|
||||
|
||||
@staticmethod
|
||||
async def handle_new_memory(user_id: str, session, job_id: str = None, memory_name: str = None):
|
||||
async def handle_new_memory(user_id: str, session, job_id: str = None, memory_name: str = None, memory_category:str='PUBLIC'):
|
||||
"""
|
||||
Handle new memory creation associated with a user.
|
||||
|
||||
|
|
@ -275,6 +275,7 @@ class Memory:
|
|||
user_id=user_id,
|
||||
operation_id=job_id,
|
||||
memory_name=memory_name,
|
||||
memory_category=memory_category,
|
||||
methods_list=str(["Memory", "SemanticMemory", "EpisodicMemory"]),
|
||||
attributes_list=str(
|
||||
[
|
||||
|
|
|
|||
488
main.py
488
main.py
|
|
@ -1,4 +1,6 @@
|
|||
from typing import Optional, List
|
||||
|
||||
from neo4j.exceptions import Neo4jError
|
||||
from pydantic import BaseModel, Field
|
||||
from cognitive_architecture.database.graph_database.graph import Neo4jGraphDB
|
||||
from cognitive_architecture.database.postgres.models.memory import MemoryModel
|
||||
|
|
@ -37,7 +39,9 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
from sqlalchemy.future import select
|
||||
from cognitive_architecture.utils import get_document_names, generate_letter_uuid, get_memory_name_by_doc_id, get_unsumarized_vector_db_namespace, get_vectordb_namespace, get_vectordb_document_name
|
||||
|
||||
async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str, namespace_id:str):
|
||||
|
||||
|
||||
async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str, namespace_id:str, doc_id:str=None):
|
||||
logging.info("user id is", user_id)
|
||||
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, memory_label=namespace_id)
|
||||
|
||||
|
|
@ -63,12 +67,14 @@ async def fetch_document_vectordb_namespace(session: AsyncSession, user_id: str,
|
|||
|
||||
print("Available memory classes:", await memory.list_memory_classes())
|
||||
result = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
|
||||
observation="placeholder", search_type="summary")
|
||||
observation="placeholder", search_type="summary_filter_by_object_name", params=doc_id)
|
||||
logging.info("Result is", result)
|
||||
|
||||
return result, namespace_id
|
||||
|
||||
async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, content:str=None, job_id:str=None, loader_settings:dict=None):
|
||||
|
||||
|
||||
async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, content:str=None, job_id:str=None, loader_settings:dict=None, memory_type:str="PRIVATE"):
|
||||
namespace_id = str(generate_letter_uuid()) + "_" + "SEMANTICMEMORY"
|
||||
namespace_class = namespace_id + "_class"
|
||||
|
||||
|
|
@ -78,7 +84,6 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
|
|||
await add_entity(session, new_user)
|
||||
except:
|
||||
pass
|
||||
|
||||
if job_id is None:
|
||||
job_id = str(uuid.uuid4())
|
||||
|
||||
|
|
@ -92,37 +97,50 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
|
|||
),
|
||||
)
|
||||
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id=job_id, memory_label=namespace_id)
|
||||
|
||||
if content is not None:
|
||||
document_names = [content[:30]]
|
||||
if loader_settings is not None:
|
||||
document_names = get_document_names(loader_settings.get("path", "None"))
|
||||
document_source = loader_settings.get("document_names") if isinstance(loader_settings.get("document_names"),
|
||||
list) else loader_settings.get("path", "None")
|
||||
logging.info("Document source is %s", document_source)
|
||||
# try:
|
||||
document_names = get_document_names(document_source[0])
|
||||
logging.info(str(document_names))
|
||||
# except:
|
||||
# document_names = document_source
|
||||
for doc in document_names:
|
||||
from cognitive_architecture.shared.language_processing import translate_text, detect_language
|
||||
#translates doc titles to english
|
||||
if loader_settings is not None:
|
||||
logging.info("Detecting language of document %s", doc)
|
||||
loader_settings["single_document_path"]= loader_settings.get("path", "None")[0] +"/"+doc
|
||||
logging.info("Document path is %s", loader_settings.get("single_document_path", "None"))
|
||||
memory_category = loader_settings.get("memory_category", "PUBLIC")
|
||||
if loader_settings is None:
|
||||
memory_category = "CUSTOM"
|
||||
if detect_language(doc) != "en":
|
||||
doc_ = doc.strip(".pdf").replace("-", " ")
|
||||
doc_ = translate_text(doc_, "sr", "en")
|
||||
else:
|
||||
doc_=doc
|
||||
doc_id = str(uuid.uuid4())
|
||||
|
||||
logging.info("Document name is %s", doc_)
|
||||
await add_entity(
|
||||
session,
|
||||
DocsModel(
|
||||
id=str(uuid.uuid4()),
|
||||
id=doc_id,
|
||||
operation_id=job_id,
|
||||
graph_summary= False,
|
||||
doc_name=doc
|
||||
memory_category= memory_category,
|
||||
doc_name=doc_
|
||||
)
|
||||
)
|
||||
# Managing memory attributes
|
||||
existing_user = await Memory.check_existing_user(user_id, session)
|
||||
print("here is the existing user", existing_user)
|
||||
await memory.manage_memory_attributes(existing_user)
|
||||
params = {
|
||||
"version": "1.0",
|
||||
"agreement_id": "AG123456",
|
||||
"privacy_policy": "https://example.com/privacy",
|
||||
"terms_of_service": "https://example.com/terms",
|
||||
"format": "json",
|
||||
"schema_version": "1.1",
|
||||
"checksum": "a1b2c3d4e5f6",
|
||||
"owner": "John Doe",
|
||||
"license": "MIT",
|
||||
"validity_start": "2023-08-01",
|
||||
"validity_end": "2024-07-31",
|
||||
"doc_id":doc_id
|
||||
}
|
||||
print("Namespace id is %s", namespace_id)
|
||||
await memory.add_dynamic_memory_class(namespace_id.lower(), namespace_id)
|
||||
|
|
@ -141,10 +159,7 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, con
|
|||
print("Available memory classes:", await memory.list_memory_classes())
|
||||
result = await memory.dynamic_method_call(dynamic_memory_class, 'add_memories',
|
||||
observation=content, params=params, loader_settings=loader_settings)
|
||||
|
||||
await update_entity(session, Operation, job_id, "SUCCESS")
|
||||
# return result, namespace_id
|
||||
|
||||
|
||||
async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_input: str):
|
||||
|
||||
|
|
@ -174,37 +189,120 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu
|
|||
|
||||
return result
|
||||
|
||||
# async def add_documents_to_graph_db(session: AsyncSession, user_id: Optional[str] = None,
|
||||
# document_memory_types: Optional[List[str]] = None):
|
||||
# """ Add documents to a graph database, handling multiple memory types """
|
||||
# if document_memory_types is None:
|
||||
# document_memory_types = ['PUBLIC']
|
||||
#
|
||||
# memory_type_actions = {
|
||||
# 'PUBLIC': {'topic': 'PublicMemory', 'additional_action': None},
|
||||
# 'SEMANTIC': {'topic': 'SemanticMemory', 'additional_action': None}
|
||||
# }
|
||||
#
|
||||
# try:
|
||||
# memory_details, docs = await get_unsumarized_vector_db_namespace(session, user_id)
|
||||
# filtered_memory_details = [detail for detail in memory_details if detail[1] in document_memory_types]
|
||||
#
|
||||
# neo4j_graph_db = None
|
||||
# for doc in docs:
|
||||
# doc_name, doc_id = doc
|
||||
# try:
|
||||
# classification_content = await fetch_document_vectordb_namespace(
|
||||
# session, user_id, filtered_memory_details[0][0], doc_id)
|
||||
# retrieval_chunks = [item['text'] for item in
|
||||
# classification_content[0]['data']['Get'][filtered_memory_details[0][0]]]
|
||||
# except Exception as e:
|
||||
# logging.error(f"Error fetching document content: {e}")
|
||||
# retrieval_chunks = ""
|
||||
#
|
||||
# concatenated_retrievals = ' '.join(retrieval_chunks)
|
||||
# classification = await classify_documents(doc_name, document_id=doc_id, content=concatenated_retrievals)
|
||||
#
|
||||
# for memory_type in document_memory_types:
|
||||
# if memory_type in memory_type_actions:
|
||||
# if neo4j_graph_db is None:
|
||||
# neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
|
||||
# username=config.graph_database_username,
|
||||
# password=config.graph_database_password)
|
||||
# topic = memory_type_actions[memory_type]['topic']
|
||||
# ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
|
||||
# for id in ids:
|
||||
# memory_id = id.get('memoryId')
|
||||
# if memory_id:
|
||||
# rs = neo4j_graph_db.create_document_node_cypher(classification, user_id,
|
||||
# public_memory_id=memory_id if memory_type == 'PUBLIC' else None)
|
||||
# neo4j_graph_db.query(rs)
|
||||
#
|
||||
# if filtered_memory_details[0][1] == memory_type:
|
||||
# neo4j_graph_db.update_document_node_with_db_ids(
|
||||
# vectordb_namespace=filtered_memory_details[0][0],
|
||||
# document_id=doc_id, user_id=user_id if memory_type != "PUBLIC" else None)
|
||||
# except Exception as e:
|
||||
# logging.error(f"An error occurred: {e}")
|
||||
# return e
|
||||
|
||||
|
||||
|
||||
async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, loader_settings:dict=None):
|
||||
async def add_documents_to_graph_db(session: AsyncSession, user_id: str= None, document_memory_types:list=None):
|
||||
""""""
|
||||
if document_memory_types is None:
|
||||
document_memory_types = ['PUBLIC']
|
||||
|
||||
logging.info("Document memory types are", document_memory_types)
|
||||
try:
|
||||
# await update_document_vectordb_namespace(postgres_session, user_id)
|
||||
memory_names, docs = await get_unsumarized_vector_db_namespace(session, user_id)
|
||||
memory_details, docs = await get_unsumarized_vector_db_namespace(session, user_id)
|
||||
|
||||
logging.info("Docs are", docs)
|
||||
for doc, memory_name in zip(docs, memory_names):
|
||||
logging.info("Memory names are", memory_name)
|
||||
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_name)
|
||||
logging.info("Classification content is", classification_content)
|
||||
|
||||
retrieval_chunks = [item['text'] for item in classification_content[0]['data']['Get'][memory_name]]
|
||||
memory_details= [detail for detail in memory_details if detail[1] in document_memory_types]
|
||||
logging.info("Memory details", memory_details)
|
||||
for doc in docs:
|
||||
logging.info("Memory names are", memory_details)
|
||||
doc_name, doc_id = doc
|
||||
logging.info("Doc id is", doc_id)
|
||||
try:
|
||||
classification_content = await fetch_document_vectordb_namespace(session, user_id, memory_details[0][0], doc_id)
|
||||
retrieval_chunks = [item['text'] for item in
|
||||
classification_content[0]['data']['Get'][memory_details[0][0]]]
|
||||
logging.info("Classification content is", classification_content)
|
||||
except:
|
||||
classification_content = ""
|
||||
retrieval_chunks = ""
|
||||
# retrieval_chunks = [item['text'] for item in classification_content[0]['data']['Get'][memory_details[0]]]
|
||||
# Concatenating the extracted text values
|
||||
concatenated_retrievals = ' '.join(retrieval_chunks)
|
||||
print(concatenated_retrievals)
|
||||
doc_name, doc_id = doc
|
||||
logging.info("Retrieval chunks are", retrieval_chunks)
|
||||
classification = await classify_documents(doc_name, document_id =doc_id, content=concatenated_retrievals)
|
||||
|
||||
logging.info("Classification is", str(classification))
|
||||
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
|
||||
password=config.graph_database_password)
|
||||
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id)
|
||||
neo4j_graph_db.query(rs, classification)
|
||||
neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name,
|
||||
if document_memory_types == ['PUBLIC']:
|
||||
await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
|
||||
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="PublicMemory")
|
||||
print(ids)
|
||||
else:
|
||||
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="SemanticMemory")
|
||||
print(ids)
|
||||
|
||||
for id in ids:
|
||||
print(id.get('memoryId'))
|
||||
if document_memory_types == ['PUBLIC']:
|
||||
|
||||
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id, public_memory_id=id.get('memoryId'))
|
||||
else:
|
||||
rs = neo4j_graph_db.create_document_node_cypher(classification, user_id, memory_type='SemanticMemory')
|
||||
logging.info("Cypher query is", rs)
|
||||
neo4j_graph_db.query(rs)
|
||||
logging.info("WE GOT HERE")
|
||||
if memory_details[0][1] == "PUBLIC":
|
||||
neo4j_graph_db.update_document_node_with_db_ids( vectordb_namespace=memory_details[0][0],
|
||||
document_id=doc_id)
|
||||
logging.info("hERE IS THE OUT2323", doc_id)
|
||||
await update_entity_graph_summary(session, DocsModel, doc_id, True)
|
||||
else:
|
||||
neo4j_graph_db.update_document_node_with_db_ids( vectordb_namespace=memory_details[0][0],
|
||||
document_id=doc_id, user_id=user_id)
|
||||
# await update_entity_graph_summary(session, DocsModel, doc_id, True)
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
|
|
@ -230,11 +328,13 @@ def generate_graph(input) -> ResponseString:
|
|||
response_model=ResponseString,
|
||||
)
|
||||
return out
|
||||
async def user_context_enrichment(session, user_id:str, query:str)->str:
|
||||
async def user_context_enrichment(session, user_id:str, query:str, generative_response:bool=False, memory_type:str=None)->str:
|
||||
"""
|
||||
Asynchronously enriches the user context by integrating various memory systems and document classifications.
|
||||
|
||||
This function uses cognitive architecture to access and manipulate different memory systems (semantic, episodic, and procedural) associated with a user. It fetches memory details from a Neo4j graph database, classifies document categories based on the user's query, and retrieves document IDs for relevant categories. The function also dynamically manages memory attributes and methods, extending the context with document store information to enrich the user's query response.
|
||||
This function uses cognitive architecture to access and manipulate different memory systems (semantic, episodic, and procedural) associated with a user.
|
||||
It fetches memory details from a Neo4j graph database, classifies document categories based on the user's query, and retrieves document IDs for relevant categories.
|
||||
The function also dynamically manages memory attributes and methods, extending the context with document store information to enrich the user's query response.
|
||||
|
||||
Parameters:
|
||||
- session (AsyncSession): The database session for executing queries.
|
||||
|
|
@ -265,31 +365,40 @@ async def user_context_enrichment(session, user_id:str, query:str)->str:
|
|||
|
||||
semantic_mem = neo4j_graph_db.retrieve_semantic_memory(user_id=user_id)
|
||||
episodic_mem = neo4j_graph_db.retrieve_episodic_memory(user_id=user_id)
|
||||
context = f""" You are a memory system that uses cognitive architecture to enrich the user context.
|
||||
You have access to the following information:
|
||||
EPISODIC MEMORY: {episodic_mem}
|
||||
SEMANTIC MEMORY: {semantic_mem}
|
||||
PROCEDURAL MEMORY: NULL
|
||||
The original user query: {query}
|
||||
"""
|
||||
# public_mem = neo4j_graph_db.retrieve_public_memory(user_id=user_id)
|
||||
|
||||
|
||||
logging.info("Context from graphdb is %s", context)
|
||||
document_categories_query = await neo4j_graph_db.get_document_categories(user_id=user_id)
|
||||
result = neo4j_graph_db.query(document_categories_query)
|
||||
categories = [record["category"] for record in result]
|
||||
logging.info('Possible document categories are', str(categories))
|
||||
relevant_categories = await classify_call( query= query, context = context, document_types=str(categories))
|
||||
logging.info("Relevant categories after the classifier are %s", relevant_categories)
|
||||
from cognitive_architecture.shared.language_processing import translate_text, detect_language
|
||||
|
||||
get_doc_ids = await neo4j_graph_db.get_document_ids(user_id, relevant_categories)
|
||||
|
||||
postgres_id = neo4j_graph_db.query(get_doc_ids)
|
||||
if detect_language(query) != "en":
|
||||
query = translate_text(query, "sr", "en")
|
||||
logging.info("Translated query is", query)
|
||||
|
||||
|
||||
summaries = await neo4j_graph_db.get_memory_linked_document_summaries(user_id=user_id, memory_type=memory_type)
|
||||
|
||||
# logging.info("Result is %s", result)
|
||||
# logging.info("Context from graphdb is %s", context)
|
||||
# result = neo4j_graph_db.query(document_categories_query)
|
||||
# summaries = [record.get("summary") for record in result]
|
||||
# logging.info('Possible document categories are', str(result))
|
||||
# logging.info('Possible document categories are', str(categories))
|
||||
relevant_summary = await classify_call( query= query, document_summaries=str(summaries))
|
||||
|
||||
|
||||
# logging.info("Relevant categories after the classifier are %s", relevant_categories)
|
||||
postgres_id = await neo4j_graph_db.get_memory_linked_document_ids(user_id, summary = relevant_summary, memory_type=memory_type)
|
||||
|
||||
# postgres_id = neo4j_graph_db.query(get_doc_ids)
|
||||
logging.info("Postgres ids are %s", postgres_id)
|
||||
namespace_id = await get_memory_name_by_doc_id(session, postgres_id[0]["d_id"])
|
||||
namespace_id = await get_memory_name_by_doc_id(session, postgres_id[0])
|
||||
logging.info("Namespace ids are %s", namespace_id)
|
||||
params= {"doc_id":postgres_id[0]}
|
||||
namespace_id = namespace_id[0]
|
||||
namespace_class = namespace_id + "_class"
|
||||
if memory_type =='PublicMemory':
|
||||
user_id = 'system_user'
|
||||
|
||||
memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id="23232",
|
||||
memory_label=namespace_id)
|
||||
|
|
@ -313,15 +422,151 @@ async def user_context_enrichment(session, user_id:str, query:str)->str:
|
|||
print(f"No attribute named in memory.")
|
||||
|
||||
print("Available memory classes:", await memory.list_memory_classes())
|
||||
result = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
|
||||
observation=query)
|
||||
context_extension = "Document store information that can help and enrich the anwer is: " + str(result)
|
||||
entire_context = context + context_extension
|
||||
final_result = generate_graph(entire_context)
|
||||
logging.info("Final result is %s", final_result)
|
||||
results = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories',
|
||||
observation=query, params=postgres_id[0], search_type="summary_filter_by_object_name")
|
||||
logging.info("Result is", str(results))
|
||||
|
||||
return final_result.response
|
||||
search_context = ""
|
||||
|
||||
for result in results['data']['Get'][namespace_id]:
|
||||
# Assuming 'result' is a dictionary and has keys like 'source', 'text'
|
||||
source = result['source']
|
||||
text = result['text']
|
||||
search_context += f"Document source: {source}, Document text: {text} \n"
|
||||
|
||||
context = f""" You are a memory system that uses cognitive architecture to enrich the
|
||||
LLM context and provide better query response.
|
||||
You have access to the following information:
|
||||
EPISODIC MEMORY: {episodic_mem}
|
||||
SEMANTIC MEMORY: {semantic_mem}
|
||||
PROCEDURAL MEMORY: NULL
|
||||
SEARCH CONTEXT: The following documents provided with sources they were
|
||||
extracted from could be used to provide an answer {search_context}
|
||||
The original user query: {query}
|
||||
"""
|
||||
if generative_response is not True:
|
||||
return context
|
||||
else:
|
||||
generative_result = generate_graph(context)
|
||||
translation_to_srb = translate_text(generative_result.response, "en", "sr")
|
||||
return translation_to_srb
|
||||
|
||||
|
||||
async def create_public_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
|
||||
"""
|
||||
Create a public memory node associated with a user in a Neo4j graph database.
|
||||
If Public Memory exists, it will return the id of the memory.
|
||||
This is intended as standalone node that can be attached to any user.
|
||||
It is not attached to any user by default.
|
||||
|
||||
Args:
|
||||
user_id (str): The unique identifier for the user.
|
||||
session (AsyncSession): An asynchronous session for database operations.
|
||||
|
||||
Returns:
|
||||
Optional[int]: The ID of the created public memory node or None if an error occurs.
|
||||
:param labels: Label for the memory, to help filter for different countries
|
||||
:param topic: Topic for the memory, to help provide a name
|
||||
|
||||
"""
|
||||
# Validate input parameters
|
||||
if not labels:
|
||||
labels = ['sr'] # Labels for the memory node
|
||||
|
||||
if not topic:
|
||||
topic = "PublicMemory"
|
||||
|
||||
|
||||
try:
|
||||
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
|
||||
username=config.graph_database_username,
|
||||
password=config.graph_database_password)
|
||||
|
||||
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
|
||||
# Create the memory node
|
||||
memory_id = await neo4j_graph_db.create_memory_node(labels=labels, topic=topic)
|
||||
return memory_id
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error creating public memory node: {e}")
|
||||
return None
|
||||
|
||||
async def attach_user_to_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
|
||||
"""
|
||||
Link user to public memory
|
||||
|
||||
Args:
|
||||
user_id (str): The unique identifier for the user.
|
||||
topic (str): Memory name
|
||||
|
||||
|
||||
Returns:
|
||||
Optional[int]: The ID of the created public memory node or None if an error occurs.
|
||||
:param labels: Label for the memory, to help filter for different countries
|
||||
:param topic: Topic for the memory, to help provide a name
|
||||
|
||||
"""
|
||||
# Validate input parameters
|
||||
if not user_id:
|
||||
raise ValueError("User ID is required.")
|
||||
if not labels:
|
||||
labels = ['sr'] # Labels for the memory node
|
||||
|
||||
if not topic:
|
||||
topic = "PublicMemory"
|
||||
|
||||
|
||||
try:
|
||||
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
|
||||
username=config.graph_database_username,
|
||||
password=config.graph_database_password)
|
||||
|
||||
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
|
||||
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
|
||||
|
||||
for id in ids:
|
||||
linked_memory = neo4j_graph_db.link_public_memory_to_user(memory_id=id.get('memoryId'), user_id=user_id)
|
||||
return 1
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error creating public memory node: {e}")
|
||||
return None
|
||||
|
||||
async def unlink_user_from_memory(user_id: str=None, labels:list=None, topic:str=None) -> Optional[int]:
|
||||
"""
|
||||
Unlink user from memory
|
||||
|
||||
Args:
|
||||
user_id (str): The unique identifier for the user.
|
||||
topic (str): Memory name
|
||||
|
||||
Returns:
|
||||
Optional[int]: The ID of the created public memory node or None if an error occurs.
|
||||
:param labels: Label for the memory, to help filter for different countries
|
||||
:param topic: Topic for the memory, to help provide a name
|
||||
|
||||
"""
|
||||
# Validate input parameters
|
||||
if not user_id:
|
||||
raise ValueError("User ID is required.")
|
||||
if not labels:
|
||||
labels = ['sr'] # Labels for the memory node
|
||||
raise ValueError("Labels are required, default label 'sr' provided")
|
||||
if not topic:
|
||||
topic = "PublicMemory"
|
||||
raise ValueError("Topic is required, default topic 'PublicMemory' provided")
|
||||
|
||||
try:
|
||||
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url,
|
||||
username=config.graph_database_username,
|
||||
password=config.graph_database_password)
|
||||
|
||||
# Assuming the topic for public memory is predefined, e.g., "PublicMemory"
|
||||
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic=topic)
|
||||
for id in ids:
|
||||
linked_memory = neo4j_graph_db.unlink_memory_from_user(memory_id=id.get('memoryId'), user_id=user_id)
|
||||
return 1
|
||||
except Neo4jError as e:
|
||||
logging.error(f"Error creating public memory node: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
|
@ -329,7 +574,120 @@ async def main():
|
|||
user_id = "user"
|
||||
|
||||
async with session_scope(AsyncSessionLocal()) as session:
|
||||
await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
|
||||
# await update_entity(session, DocsModel, "8cd9a022-5a7a-4af5-815a-f988415536ae", True)
|
||||
# output = await get_unsumarized_vector_db_namespace(session, user_id)
|
||||
|
||||
|
||||
|
||||
class GraphQLQuery(BaseModel):
|
||||
query: str
|
||||
|
||||
# def cypher_statement_correcting( input: str) -> str:
|
||||
# out = aclient.chat.completions.create(
|
||||
# model=config.model,
|
||||
# temperature=0,
|
||||
# max_tokens=2000,
|
||||
# messages=[
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": f"""Check the cypher query for syntax issues, and fix any if found and return it as is: {input}. """,
|
||||
#
|
||||
# },
|
||||
# {"role": "system", "content": """You are a top-tier algorithm
|
||||
# designed for checking cypher queries for neo4j graph databases. You have to return input provided to you as is."""}
|
||||
# ],
|
||||
# response_model=GraphQLQuery,
|
||||
# )
|
||||
# return out
|
||||
#
|
||||
#
|
||||
# query= """WITH person1_4f21b68c73e24d0497e1010eb747b892, location2_dc0c68a9651142d38b6e117bfdc5c227, object3_4c7ba47babd24be1b35c30c42c87a3e9, product4_c984d5f9695f48ee9a43f58f57cc6740, location5_5e43f4c45b3c44ea897c12220db4c051, object6_5cdb87ad488c450c9dbce07b7daf3d8d, information7_f756e3f3720c4fe5aeb01287badaf088, event8_da6334e744454264900296319e14b532, action9_48e45419604e4d66b3e718ee1d6c095f, action10_f48acb1db4da4934afbe17363e9e63a4, user , semantic, episodic, buffer
|
||||
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:EXPERIENCED]->(event8_da6334e744454264900296319e14b532)
|
||||
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:HAS]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
|
||||
# CREATE (object3_4c7ba47babd24be1b35c30c42c87a3e9)-[:INCLUDES]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
|
||||
# CREATE (product4_c984d5f9695f48ee9a43f58f57cc6740)-[:TO_BE_PURCHASED_AT]->(location5_5e43f4c45b3c44ea897c12220db4c051)
|
||||
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:INTENDS_TO_PERFORM]->(action9_48e45419604e4d66b3e718ee1d6c095f)
|
||||
# CREATE (object6_5cdb87ad488c450c9dbce07b7daf3d8d)-[:A_CLASSICAL_BOOK_TO_BE_SUMMARIZED]->(information7_f756e3f3720c4fe5aeb01287badaf088)
|
||||
# CREATE (person1_4f21b68c73e24d0497e1010eb747b892)-[:NEEDS_TO_COMPLETE]->(action10_f48acb1db4da4934afbe17363e9e63a4)
|
||||
# WITH person1_4f21b68c73e24d0497e1010eb747b892, location2_dc0c68a9651142d38b6e117bfdc5c227, object3_4c7ba47babd24be1b35c30c42c87a3e9, product4_c984d5f9695f48ee9a43f58f57cc6740, location5_5e43f4c45b3c44ea897c12220db4c051, object6_5cdb87ad488c450c9dbce07b7daf3d8d, information7_f756e3f3720c4fe5aeb01287badaf088, event8_da6334e744454264900296319e14b532, action9_48e45419604e4d66b3e718ee1d6c095f, action10_f48acb1db4da4934afbe17363e9e63a4, user, semantic, episodic, buffer
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(person1_4f21b68c73e24d0497e1010eb747b892)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(person1_4f21b68c73e24d0497e1010eb747b892)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(location2_dc0c68a9651142d38b6e117bfdc5c227)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(location2_dc0c68a9651142d38b6e117bfdc5c227)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(object3_4c7ba47babd24be1b35c30c42c87a3e9)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(product4_c984d5f9695f48ee9a43f58f57cc6740)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(location5_5e43f4c45b3c44ea897c12220db4c051)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(location5_5e43f4c45b3c44ea897c12220db4c051)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(object6_5cdb87ad488c450c9dbce07b7daf3d8d)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(object6_5cdb87ad488c450c9dbce07b7daf3d8d)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(information7_f756e3f3720c4fe5aeb01287badaf088)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(information7_f756e3f3720c4fe5aeb01287badaf088)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(event8_da6334e744454264900296319e14b532)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(event8_da6334e744454264900296319e14b532)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action9_48e45419604e4d66b3e718ee1d6c095f)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action9_48e45419604e4d66b3e718ee1d6c095f)
|
||||
# CREATE (episodic)-[:HAS_EVENT]->(action10_f48acb1db4da4934afbe17363e9e63a4)
|
||||
# CREATE (buffer)-[:CURRENTLY_HOLDING]->(action10_f48acb1db4da4934afbe17363e9e63a4)"""
|
||||
#
|
||||
# out = cypher_statement_correcting(query)
|
||||
# print(out)
|
||||
#
|
||||
# out = await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday")
|
||||
# print(out)
|
||||
# load_doc_to_graph = await add_documents_to_graph_db(session, user_id)
|
||||
# print(load_doc_to_graph)
|
||||
# user_id = 'user'
|
||||
# loader_settings = {
|
||||
# "format": "PDF",
|
||||
# "source": "DEVICE",
|
||||
# "path": [".data"]
|
||||
# }
|
||||
# await load_documents_to_vectorstore(session, user_id, loader_settings=loader_settings)
|
||||
# await create_public_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
|
||||
# await add_documents_to_graph_db(session, user_id)
|
||||
#
|
||||
neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username,
|
||||
password=config.graph_database_password)
|
||||
|
||||
# await attach_user_to_memory(user_id=user_id, labels=['sr'], topic="PublicMemory")
|
||||
|
||||
# return_ = await user_context_enrichment(user_id=user_id, query="Koja je minimalna visina ograde na balkonu na stambenom objektu", session=session)
|
||||
# print(return_)
|
||||
|
||||
# document_summary = {
|
||||
# 'DocumentCategory': 'Science',
|
||||
# 'Title': 'The Future of AI',
|
||||
# 'Summary': 'An insightful article about the advancements in AI.',
|
||||
# 'd_id': 'doc123'
|
||||
# }
|
||||
#
|
||||
# # Example user ID
|
||||
# user_id = 'user'
|
||||
#
|
||||
# # value = await neo4j_graph_db.create_memory_node(labels=['sr'])
|
||||
# # print(value)
|
||||
# # neo4j_graph_db.close()
|
||||
#
|
||||
# await add_documents_to_graph_db(session, user_id)
|
||||
# neo4j_graph_db.link_public_memory_to_user(memory_id = 17,user_id=user_id)
|
||||
#
|
||||
ids = neo4j_graph_db.retrieve_node_id_for_memory_type(topic="Document")
|
||||
print(ids)
|
||||
|
||||
for id in ids:
|
||||
print(id.get('memoryId'))
|
||||
|
||||
neo4j_graph_db.delete_memory_node(memory_id = id.get('memoryId'), topic="Document")
|
||||
#
|
||||
# neo4j_graph_db.delete_memory_node(memory_id=16, topic="PublicSerbianArchitecture")
|
||||
# neo4j_graph_db.unlink_memory_from_user(memory_id = 17,user_id=user_id)
|
||||
# cypher_query_public = neo4j_graph_db.create_document_node_cypher(document_summary, user_id, memory_type="PUBLIC")
|
||||
# neo4j_graph_db.query(cypher_query_public)
|
||||
# link_memory_to_user(user_id, session)
|
||||
|
||||
# neo4j_graph_db.create_memory_node(labels=['sr'])
|
||||
# out = await get_vectordb_namespace(session, user_id)
|
||||
# params = {
|
||||
# "version": "1.0",
|
||||
|
|
|
|||
12
poetry.lock
generated
12
poetry.lock
generated
|
|
@ -2121,6 +2121,16 @@ typing_extensions = "*"
|
|||
[package.extras]
|
||||
aws = ["boto3"]
|
||||
|
||||
[[package]]
|
||||
name = "iso639"
|
||||
version = "0.1.4"
|
||||
description = "ISO639-2 support for Python."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "iso639-0.1.4.tar.gz", hash = "sha256:88b70cf6c64ee9c2c2972292818c8beb32db9ea6f4de1f8471a9b081a3d92e98"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itsdangerous"
|
||||
version = "2.1.2"
|
||||
|
|
@ -6782,4 +6792,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "b32ccb9319a4411b11b4aa2fde21b354ea93488e4bd6057c62847baf47cf7db6"
|
||||
content-hash = "c3402714a133f8583fcbf06955f155d42169ffd54874d565617b2c4cdf58a227"
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ graphviz = "^0.20.1"
|
|||
greenlet = "^3.0.1"
|
||||
neo4j = "^5.14.1"
|
||||
grpcio = "^1.60.0"
|
||||
langdetect = "^1.0.9"
|
||||
iso639 = "^0.1.4"
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue