From 4c72ebf2c749391bc0ea83294c00ca7376bc9d22 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 18 Nov 2023 12:57:11 +0100 Subject: [PATCH] User context enrichment mostly works now, left to add job state management, retrieve id from vectorstore and do semantic search, and then run it to chatgpt with enriched context --- level_4/api.py | 77 ++-- .../classifiers/classifier.py | 16 +- .../database/graph_database/graph.py | 31 +- .../database/postgres/models/user.py | 5 + .../database/vectordb/chunkers/chunkers.py | 34 ++ .../database/vectordb/loaders/loaders.py | 3 + .../shared/chunk_strategy.py | 1 + level_4/main.py | 408 +++++++++--------- 8 files changed, 291 insertions(+), 284 deletions(-) diff --git a/level_4/api.py b/level_4/api.py index e8c2dcbba..113a6da20 100644 --- a/level_4/api.py +++ b/level_4/api.py @@ -13,7 +13,7 @@ from cognitive_architecture.database.postgres.database import AsyncSessionLocal from cognitive_architecture.database.postgres.database_crud import session_scope from cognitive_architecture.vectorstore_manager import Memory from dotenv import load_dotenv - +from main import add_documents_to_graph_db from cognitive_architecture.config import Config # Set up logging @@ -101,65 +101,42 @@ async def user_query_to_graph(payload: Payload): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.post("/user-to-document-summary") -async def generate_document_summary(payload: Payload,): - try: - from database.graph_database.graph import Neo4jGraphDB - neo4j_graph_db = Neo4jGraphDB(config.graph_database_url, config.graph_database_username, config.graph_database_password) - decoded_payload = payload.payload - call_of_the_wild_summary = { - "user_id": decoded_payload["user_id"], - "document_category": "Classic Literature", - "title": "The Call of the Wild", - "summary": ( - "'The Call of the Wild' is a novel by Jack London set in the Yukon during the 1890s Klondike " - "Gold Rush—a period when strong sled dogs were in high demand. The novel's central character " - "is a dog named Buck, a domesticated dog living at a ranch in the Santa Clara Valley of California " - "as the story opens. Stolen from his home and sold into the brutal existence of an Alaskan sled dog, " - "he reverts to atavistic traits. Buck is forced to adjust to, and survive, cruel treatments and fight " - "to dominate other dogs in a harsh climate. Eventually, he sheds the veneer of civilization, relying " - "on primordial instincts and lessons he learns, to emerge as a leader in the wild. London drew on his " - "own experiences in the Klondike, and the book provides a snapshot of the epical gold rush and the " - "harsh realities of life in the wilderness. The novel explores themes of morality versus instinct, " - "the struggle for survival in the natural world, and the intrusion of civilization on the wilderness. " - "As Buck's wild nature is awakened, he rises to become a respected and feared leader in the wild, " - "answering the primal call of nature." - ) - } - cypher_query = neo4j_graph_db.create_document_node_cypher(call_of_the_wild_summary, decoded_payload['user_id']) - neo4j_graph_db.query(cypher_query, call_of_the_wild_summary) + +@app.post("/document_to_graph_db") +async def document_to_graph_db(payload: Payload): + try: + decoded_payload = payload.payload # Execute the query - replace this with the actual execution method async with session_scope(session=AsyncSessionLocal()) as session: # Assuming you have a method in Neo4jGraphDB to execute the query - result = await neo4j_graph_db.query(cypher_query, session) - + result = await add_documents_to_graph_db(session, decoded_payload['user_id'], decoded_payload['settings']) return result except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.post("/user-document-vectordb") -async def generate_document_to_vector_db(payload: Payload, ): - try: - from database.graph_database.graph import Neo4jGraphDB - neo4j_graph_db = Neo4jGraphDB(config.graph_database_url, config.graph_database_username, - config.graph_database_password) - decoded_payload = payload.payload - - - neo4j_graph_db.update_document_node_with_namespace(decoded_payload['user_id'], document_title="The Call of the Wild") - - # Execute the query - replace this with the actual execution method - # async with session_scope(session=AsyncSessionLocal()) as session: - # # Assuming you have a method in Neo4jGraphDB to execute the query - # result = await neo4j_graph_db.query(cypher_query, session) - # - # return result - - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) +# @app.post("/user-document-vectordb") +# async def generate_document_to_vector_db(payload: Payload, ): +# try: +# from database.graph_database.graph import Neo4jGraphDB +# neo4j_graph_db = Neo4jGraphDB(config.graph_database_url, config.graph_database_username, +# config.graph_database_password) +# decoded_payload = payload.payload +# +# +# neo4j_graph_db.update_document_node_with_namespace(decoded_payload['user_id'], document_title="The Call of the Wild") +# +# # Execute the query - replace this with the actual execution method +# # async with session_scope(session=AsyncSessionLocal()) as session: +# # # Assuming you have a method in Neo4jGraphDB to execute the query +# # result = await neo4j_graph_db.query(cypher_query, session) +# # +# # return result +# +# except Exception as e: +# raise HTTPException(status_code=500, detail=str(e)) diff --git a/level_4/cognitive_architecture/classifiers/classifier.py b/level_4/cognitive_architecture/classifiers/classifier.py index 9d37c5b52..5b2624b83 100644 --- a/level_4/cognitive_architecture/classifiers/classifier.py +++ b/level_4/cognitive_architecture/classifiers/classifier.py @@ -1,3 +1,5 @@ +import logging + from langchain.prompts import ChatPromptTemplate import json @@ -21,11 +23,17 @@ from langchain.document_loaders import TextLoader from langchain.document_loaders import DirectoryLoader -async def classify_documents(query, document_id): +async def classify_documents(query:str, document_id:str, loader_settings:dict): + from ..database.vectordb.loaders.loaders import _document_loader + + + + document_context = await _document_loader(query, loader_settings) + logging.info("This is the document context", document_context) llm = ChatOpenAI(temperature=0, model=config.model) prompt_classify = ChatPromptTemplate.from_template( - """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id}""" + """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}""" ) json_structure = [{ "name": "summarizer", @@ -54,7 +62,7 @@ async def classify_documents(query, document_id): }, "required": ["DocumentCategory", "Title", "Summary","d_id"] } }] chain_filter = prompt_classify | llm.bind(function_call={"name": "summarizer"}, functions=json_structure) - classifier_output = await chain_filter.ainvoke({"query": query, "d_id": document_id}) + classifier_output = await chain_filter.ainvoke({"query": query, "d_id": document_id, "context": str(document_context)}) arguments_str = classifier_output.additional_kwargs['function_call']['arguments'] print("This is the arguments string", arguments_str) arguments_dict = json.loads(arguments_str) @@ -97,7 +105,7 @@ async def classify_call(query, context, document_types): arguments_str = classifier_output.additional_kwargs['function_call']['arguments'] print("This is the arguments string", arguments_str) arguments_dict = json.loads(arguments_str) - classfier_value = arguments_dict.get('summarizer', None) + classfier_value = arguments_dict.get('DocumentCategory', None) print("This is the classifier value", classfier_value) diff --git a/level_4/cognitive_architecture/database/graph_database/graph.py b/level_4/cognitive_architecture/database/graph_database/graph.py index 516ca6439..3d27d6395 100644 --- a/level_4/cognitive_architecture/database/graph_database/graph.py +++ b/level_4/cognitive_architecture/database/graph_database/graph.py @@ -376,7 +376,7 @@ class Neo4jGraphDB(AbstractGraphDB): return f"An error occurred: {str(e)}" def retrieve_semantic_memory(self, user_id: str): query = f""" - MATCH (user:User {{userId: {user_id} }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory) + MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory) MATCH (semantic)-[:HAS_KNOWLEDGE]->(knowledge) RETURN knowledge """ @@ -449,12 +449,12 @@ class Neo4jGraphDB(AbstractGraphDB): """ try: query = f''' - MATCH (user:User {{userId: {user_id} }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document) + MATCH (user:User {{userId: '{user_id}' }})-[:HAS_SEMANTIC_MEMORY]->(semantic:SemanticMemory)-[:HAS_DOCUMENT]->(document:Document) RETURN document.documentCategory AS category ''' - result = self.query(query) - categories = [record["category"] for record in result] - return categories + logging.info(f"Generated Cypher query: {query}") + return query + except Exception as e: logging.error(f"An error occurred while retrieving document categories: {str(e)}") return None @@ -477,35 +477,38 @@ class Neo4jGraphDB(AbstractGraphDB): # Validate the input parameters if not isinstance(document_summary, dict): raise ValueError("The document_summary must be a dictionary.") - if not all(key in document_summary for key in ['document_category', 'title', 'summary']): + if not all(key in document_summary for key in ['DocumentCategory', 'Title', 'Summary', 'd_id']): raise ValueError("The document_summary dictionary is missing required keys.") if not isinstance(user_id, str) or not user_id: raise ValueError("The user_id must be a non-empty string.") # Escape single quotes in the document summary data (if not using parameters) - # title = document_summary['title'].replace("'", "\\'") - # summary = document_summary['summary'].replace("'", "\\'") - # document_category = document_summary['document_category'].replace("'", "\\'") + title = document_summary['Title'].replace("'", "\\'") + summary = document_summary['Summary'].replace("'", "\\'") + document_category = document_summary['DocumentCategory'].replace("'", "\\'") + d_id = document_summary['d_id'].replace("'", "\\'") # Generate the Cypher query using parameters cypher_query = f''' // Ensure the User node exists - MERGE (user:User {{ userId: $user_id }}) + MERGE (user:User {{ userId: '{user_id}' }}) // Ensure the SemanticMemory node exists and is connected to the User - MERGE (semantic:SemanticMemory {{ userId: $user_id }}) + MERGE (semantic:SemanticMemory {{ userId: '{user_id}' }}) MERGE (user)-[:HAS_SEMANTIC_MEMORY]->(semantic) // Create the Document node with its properties CREATE (document:Document {{ - title: $title, - summary: $summary, - documentCategory: $document_category + title: '{title}', + summary: '{summary}', + documentCategory: '{document_category}', + d_id: '{d_id}' }}) // Link the Document node to the SemanticMemory node CREATE (semantic)-[:HAS_DOCUMENT]->(document) ''' + logging.info(f"Generated Cypher query: {cypher_query}") return cypher_query diff --git a/level_4/cognitive_architecture/database/postgres/models/user.py b/level_4/cognitive_architecture/database/postgres/models/user.py index b23beb8f5..7ae5c5eda 100644 --- a/level_4/cognitive_architecture/database/postgres/models/user.py +++ b/level_4/cognitive_architecture/database/postgres/models/user.py @@ -4,6 +4,11 @@ from sqlalchemy import Column, String, DateTime from sqlalchemy.orm import relationship import os import sys +from .memory import MemoryModel +from .operation import Operation +from .sessions import Session +from .metadatas import MetaDatas +from .docs import DocsModel from ..database import Base diff --git a/level_4/cognitive_architecture/database/vectordb/chunkers/chunkers.py b/level_4/cognitive_architecture/database/vectordb/chunkers/chunkers.py index 1ec2525ac..cfa2c0a2a 100644 --- a/level_4/cognitive_architecture/database/vectordb/chunkers/chunkers.py +++ b/level_4/cognitive_architecture/database/vectordb/chunkers/chunkers.py @@ -15,8 +15,11 @@ def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_ove chunked_data = chunk_by_sentence(source_data, chunk_size, chunk_overlap) elif chunk_strategy == ChunkStrategy.EXACT: chunked_data = chunk_data_exact(source_data, chunk_size, chunk_overlap) + elif chunk_strategy == ChunkStrategy.SUMMARY: + chunked_data = summary_chunker(source_data, chunk_size, chunk_overlap) else: chunked_data = vanilla_chunker(source_data, chunk_size, chunk_overlap) + return chunked_data @@ -35,6 +38,37 @@ def vanilla_chunker(source_data, chunk_size=100, chunk_overlap=20): pages = text_splitter.create_documents(source_data.content) # pages = source_data.load_and_split() return pages + +def summary_chunker(source_data, chunk_size=400, chunk_overlap=20): + """ + Chunk the given source data into smaller parts, returning the first five and last five chunks. + + Parameters: + - source_data (str): The source data to be chunked. + - chunk_size (int): The size of each chunk. + - chunk_overlap (int): The overlap between consecutive chunks. + + Returns: + - List: A list containing the first five and last five chunks of the chunked source data. + """ + from langchain.text_splitter import RecursiveCharacterTextSplitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + length_function=len + ) + + try: + pages = text_splitter.create_documents([source_data]) + except: + pages = text_splitter.create_documents(source_data.content) + + # Return the first 5 and last 5 chunks + if len(pages) > 10: + return pages[:5] + pages[-5:] + else: + return pages # Return all chunks if there are 10 or fewer + def chunk_data_exact(data_chunks, chunk_size, chunk_overlap): data = "".join(data_chunks) chunks = [] diff --git a/level_4/cognitive_architecture/database/vectordb/loaders/loaders.py b/level_4/cognitive_architecture/database/vectordb/loaders/loaders.py index 72e064980..a64efb18d 100644 --- a/level_4/cognitive_architecture/database/vectordb/loaders/loaders.py +++ b/level_4/cognitive_architecture/database/vectordb/loaders/loaders.py @@ -78,3 +78,6 @@ async def _document_loader( observation: str, loader_settings: dict): + + + diff --git a/level_4/cognitive_architecture/shared/chunk_strategy.py b/level_4/cognitive_architecture/shared/chunk_strategy.py index c06f3278e..d291f18fa 100644 --- a/level_4/cognitive_architecture/shared/chunk_strategy.py +++ b/level_4/cognitive_architecture/shared/chunk_strategy.py @@ -5,3 +5,4 @@ class ChunkStrategy(Enum): PARAGRAPH = 'paragraph' SENTENCE = 'sentence' VANILLA = 'vanilla' + SUMMARY = 'summary' diff --git a/level_4/main.py b/level_4/main.py index 0f9bc2311..b4135c6a3 100644 --- a/level_4/main.py +++ b/level_4/main.py @@ -1,19 +1,15 @@ -# from marvin import ai_classifier -# marvin.settings.openai.api_key = os.environ.get("OPENAI_API_KEY") -from pydantic import BaseModel +from pydantic import BaseModel from cognitive_architecture.database.graph_database.graph import Neo4jGraphDB from cognitive_architecture.database.postgres.models.memory import MemoryModel from cognitive_architecture.classifiers.classifier import classify_documents import os from dotenv import load_dotenv - from cognitive_architecture.database.postgres.database_crud import session_scope from cognitive_architecture.database.postgres.database import AsyncSessionLocal from cognitive_architecture.utils import generate_letter_uuid import instructor from openai import OpenAI - from cognitive_architecture.vectorstore_manager import Memory from cognitive_architecture.database.postgres.database_crud import fetch_job_id import uuid @@ -21,11 +17,10 @@ from cognitive_architecture.database.postgres.models.sessions import Session from cognitive_architecture.database.postgres.models.operation import Operation from cognitive_architecture.database.postgres.database_crud import session_scope, add_entity, update_entity, fetch_job_id from cognitive_architecture.database.postgres.models.metadatas import MetaDatas - from cognitive_architecture.database.postgres.models.docs import DocsModel from cognitive_architecture.database.postgres.models.memory import MemoryModel from level_4.cognitive_architecture.database.postgres.models.user import User - +from cognitive_architecture.classifiers.classifier import classify_call # Adds response_model to ChatCompletion # Allows the return of Pydantic model rather than raw JSON instructor.patch(OpenAI()) @@ -43,6 +38,7 @@ config.load() print(config.model) print(config.openai_key) from cognitive_architecture.utils import get_document_names +from sqlalchemy.orm import selectinload, joinedload, contains_eager import logging @@ -71,23 +67,47 @@ async def get_vectordb_document_name(session: AsyncSession, user_id: str): logging.error(f"An error occurred while retrieving the Vectordb_namespace: {str(e)}") return None -from sqlalchemy.orm import selectinload, joinedload async def get_vectordb_data(session: AsyncSession, user_id: str): + """ + Asynchronously retrieves the latest memory names and document details for a given user. + + This function executes a database query to fetch memory names and document details + associated with operations performed by a specific user. It leverages explicit joins + with the 'docs' and 'memories' tables and applies eager loading to optimize performance. + + Parameters: + - session (AsyncSession): The database session for executing the query. + - user_id (str): The unique identifier of the user. + + Returns: + - Tuple[List[str], List[Tuple[str, str]]]: A tuple containing a list of memory names and + a list of tuples with document names and their corresponding IDs. + Returns None if an exception occurs. + + Raises: + - Exception: Propagates any exceptions that occur during query execution. + + Example Usage: + """ try: result = await session.execute( - select(Operation, DocsModel.doc_name, DocsModel.id, MemoryModel.memory_name) - .join(DocsModel, Operation.docs) # Correct join with docs table - .join(MemoryModel, Operation.memories) # Correct join with memories table + select(Operation) + .join(Operation.docs) # Explicit join with docs table + .join(Operation.memories) # Explicit join with memories table + .options( + contains_eager(Operation.docs), # Informs ORM of the join for docs + contains_eager(Operation.memories) # Informs ORM of the join for memories + ) .where( (Operation.user_id == user_id) # Filter by user_id - # (Operation.operation_status == 'SUCCESS') # Optional additional filter + # Optionally, you can add more filters here ) .order_by(Operation.created_at.desc()) # Order by creation date ) - operations = result.scalars().all() + operations = result.unique().scalars().all() # Extract memory names and document names and IDs memory_names = [memory.memory_name for op in operations for memory in op.memories] @@ -101,62 +121,20 @@ async def get_vectordb_data(session: AsyncSession, user_id: str): return None -# async def retrieve_job_by_id(session, user_id, job_id): -# try: -# result = await session.execute( -# session.query(Session.id) -# .filter_by(user_id=user_id, id=job_id) -# .order_by(Session.created_at) -# ) -# return result.scalar_one_or_none() -# except Exception as e: -# logging.error(f"An error occurred while retrieving the job: {str(e)}") -# return None - - - - -# async def update_document_vectordb_namespace(postgres_session: AsyncSession, user_id: str, namespace: str = None, job_id:str=None): -# """ -# Update the Document node with the Vectordb_namespace for the given user. If the namespace is not provided, -# it will be retrieved from the PostgreSQL database. -# -# Args: -# postgres_session (AsyncSession): The async session for connecting to the PostgreSQL database. -# user_id (str): The user's unique identifier. -# namespace (str, optional): The Vectordb_namespace. If None, it will be retrieved from the database. -# -# Returns: -# The result of the update operation or None if an error occurred. -# """ -# vectordb_namespace = namespace -# -# # Retrieve namespace from the database if not provided -# if vectordb_namespace is None: -# vectordb_namespace = await get_vectordb_namespace(postgres_session, user_id) -# if not vectordb_namespace: -# logging.error("Vectordb_namespace could not be retrieved.") -# return None -# from cognitive_architecture.database.graph_database.graph import Neo4jGraphDB -# # Example initialization (replace with your actual connection details) -# neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein') -# results = [] -# for namespace in vectordb_namespace: -# update_result = neo4j_graph_db.update_document_node_with_namespace(user_id, namespace) -# results.append(update_result) -# return results - - - async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, job_id:str=None, loader_settings:dict=None): namespace_id = str(generate_letter_uuid()) + "_" + "SEMANTICMEMORY" namespace_class = namespace_id + "_class" - try: - new_user = User(id=user_id) - await add_entity(session, new_user) - except: - pass + logging.info("Namespace created with id %s", namespace_id) + + # try: + # new_user = User(id=user_id) + # await add_entity(session, new_user) + # except: + # pass + + new_user = User(id=user_id) + await add_entity(session, new_user) if job_id is None: job_id = str(uuid.uuid4()) @@ -172,10 +150,6 @@ async def load_documents_to_vectorstore(session: AsyncSession, user_id: str, job ) memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id=job_id, memory_label=namespace_id) - - - - document_names = get_document_names(loader_settings.get("path", "None")) for doc in document_names: await add_entity( @@ -258,22 +232,67 @@ async def user_query_to_graph_db(session: AsyncSession, user_id: str, query_inpu -async def add_documents_to_graph_db(postgres_session: AsyncSession, user_id: str): +async def add_documents_to_graph_db(postgres_session: AsyncSession, user_id: str, loader_settings:dict=None, stupid_local_testing_flag=False): #clean this up Vasilije, don't be sloppy """""" try: # await update_document_vectordb_namespace(postgres_session, user_id) memory_names, docs = await get_vectordb_data(postgres_session, user_id) + logging.info("Memory names are", memory_names) + logging.info("Docs are", docs) for doc, memory_name in zip(docs, memory_names): doc_name, doc_id = doc - classification = await classify_documents(doc_name, document_id=doc_id) - neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, - password=config.graph_database_password) - rs = neo4j_graph_db.create_document_node_cypher(classification, user_id) - neo4j_graph_db.query(rs, classification) + logging.info("hereee %s", doc_name) + if stupid_local_testing_flag: + classification = [{ + "DocumentCategory": "Literature", + "Title": "Bartleby, the Scrivener", + "Summary": "The document is a narrative about an enigmatic copyist named Bartleby who works in a law office. Despite initially being a diligent employee, Bartleby begins to refuse tasks with the phrase 'I would prefer not to' and eventually stops working altogether. His passive resistance and mysterious behavior confound the narrator, who is also his employer. Bartleby's refusal to leave the office leads to various complications, and he is eventually taken to the Tombs as a vagrant. The story ends with Bartleby's death and the revelation that he may have previously worked in the Dead Letter Office, which adds a layer of poignancy to his character.", + "d_id": "2a5c571f-bad6-4649-a4ac-36e4bb4f34cd" + }, + { + "DocumentCategory": "Science", + "Title": "The Mysterious World of Quantum Mechanics", + "Summary": "This article delves into the fundamentals of quantum mechanics, exploring its paradoxical nature where particles can exist in multiple states simultaneously. It discusses key experiments and theories that have shaped our understanding of the quantum world, such as the double-slit experiment, Schrödinger's cat, and quantum entanglement. The piece also touches upon the implications of quantum mechanics for future technology, including quantum computing and cryptography.", + "d_id": "f4e2c3b1-4567-8910-11a2-b3c4d5e6f7g8" + }, + { + "DocumentCategory": "History", + "Title": "The Rise and Fall of the Roman Empire", + "Summary": "This essay provides an overview of the Roman Empire's history, from its foundation to its eventual decline. It examines the political, social, and economic factors that contributed to the empire's expansion and success, as well as those that led to its downfall. Key events and figures such as Julius Caesar, the Punic Wars, and the transition from republic to empire are discussed. The essay concludes with an analysis of the empire's lasting impact on Western civilization.", + "d_id": "8h7g6f5e-4d3c-2b1a-09e8-d7c6b5a4f3e2" + }, + { + "DocumentCategory": "Technology", + "Title": "The Future of Artificial Intelligence", + "Summary": "This report explores the current state and future prospects of artificial intelligence (AI). It covers the evolution of AI from simple algorithms to advanced neural networks capable of deep learning. The document discusses various applications of AI in industries such as healthcare, finance, and transportation, as well as ethical considerations and potential risks associated with AI development. Predictions for future advancements and their societal impact are also presented.", + "d_id": "3c2b1a09-d8e7-f6g5-h4i3-j1k2l3m4n5o6" + }, + { + "DocumentCategory": "Economics", + "Title": "Global Economic Trends and Predictions", + "Summary": "This analysis examines major trends in the global economy, including the rise of emerging markets, the impact of technology on job markets, and shifts in international trade. It delves into the economic effects of recent global events, such as pandemics and geopolitical conflicts, and discusses how these might shape future economic policies and practices. The document provides predictions for economic growth, inflation rates, and currency fluctuations in the coming years.", + "d_id": "7k6j5h4g-3f2e-1d0c-b8a9-m7n6o5p4q3r2" + } + ] + for classification in classification: - # select doc from the store - neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id) + neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, + password=config.graph_database_password) + rs = neo4j_graph_db.create_document_node_cypher(classification, user_id) + neo4j_graph_db.query(rs, classification) + # select doc from the store + neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, document_id=doc_id) + else: + classification = await classify_documents(doc_name, document_id =doc_id, loader_settings=loader_settings) + neo4j_graph_db = Neo4jGraphDB(url=config.graph_database_url, username=config.graph_database_username, + password=config.graph_database_password) + rs = neo4j_graph_db.create_document_node_cypher(classification, user_id) + neo4j_graph_db.query(rs, classification) + + # select doc from the store + neo4j_graph_db.update_document_node_with_namespace(user_id, vectordb_namespace=memory_name, + document_id=doc_id) except: pass @@ -293,64 +312,104 @@ async def user_context_enrichment(session, user_id, query): The original user query: {query} """ - classifier_prompt = """ Your task is to determine if any of the following document categories are relevant to the user query and context: {query} Context is: {context} - Document categories: {document_categories}""" - from cognitive_architecture.classifiers.classifier import classify_call + logging.info("Context from graphdb is %s", context) + document_categories_query = await neo4j_graph_db.get_document_categories(user_id=user_id) + result = neo4j_graph_db.query(document_categories_query) + categories = [record["category"] for record in result] + logging.info('Possible document categories are', str(categories)) + relevant_categories = await classify_call( query= query, context = context, document_types=str(categories)) + logging.info("Relevant categories after the classifier are %s", relevant_categories) + + # memory = await Memory.create_memory(user_id, session, namespace=namespace_id, job_id=job_id, + # memory_label=namespace_id) + # + # existing_user = await Memory.check_existing_user(user_id, session) + # print("here is the existing user", existing_user) + # await memory.manage_memory_attributes(existing_user) + # + # print("Namespace id is %s", namespace_id) + # await memory.add_dynamic_memory_class(namespace_id.lower(), namespace_id) + # + # dynamic_memory_class = getattr(memory, namespace_class.lower(), None) + # + # methods_to_add = ["add_memories", "fetch_memories", "delete_memories"] + # + # if dynamic_memory_class is not None: + # for method_name in methods_to_add: + # await memory.add_method_to_class(dynamic_memory_class, method_name) + # print(f"Memory method {method_name} has been added") + # else: + # print(f"No attribute named in memory.") + # + # print("Available memory classes:", await memory.list_memory_classes()) + # result = await memory.dynamic_method_call(dynamic_memory_class, 'fetch_memories', + # observation='some_observation') + + + + + + + + # fetch_namespace_from_graph = neo4j_graph_db.get_namespaces_by_document_category(user_id=user_id, category=relevant_categories) + # + # results = [] + # + # for namespace in fetch_namespace_from_graph: + # memory = await Memory.create_memory(user_id, session, namespace=namespace) + # + # # Managing memory attributes + # existing_user = await Memory.check_existing_user(user_id, session) + # print("here is the existing user", existing_user) + # await memory.manage_memory_attributes(existing_user) + # namespace_class = namespace + # memory = await Memory.create_memory(user_id, session, namespace=namespace_class) + # + # # Managing memory attributes + # existing_user = await Memory.check_existing_user(user_id, session) + # print("here is the existing user", existing_user) + # await memory.manage_memory_attributes(existing_user) + # + # + # dynamic_memory_class = getattr(memory, namespace_class.lower(), None) + # + # await memory.add_dynamic_memory_class(dynamic_memory_class, namespace_class) + # await memory.add_method_to_class(dynamic_memory_class, "fetch_memories") + # raw_document_output = await memory.dynamic_method_call( + # memory.semanticmemory_class, + # "fetch_memories", + # observation=context, + # ) + # from openai import OpenAI + # import instructor + # + # # Enables `response_model` + # client = instructor.patch(OpenAI()) + # + # format_query_via_gpt = f""" Provide an answer to the user query: {query} Context is: {context}, Document store information is {raw_document_output} """ + # + # class UserResponse(BaseModel): + # response: str + # + # + # user = client.chat.completions.create( + # model=config.model, + # response_model=UserResponse, + # messages=[ + # {"role": "user", "content": format_query_via_gpt}, + # ] + # ) + # + # results.append(user.response) + # + # return results - document_categories = neo4j_graph_db.get_document_categories(user_id=user_id) - relevant_categories = await classify_call( query, context, document_types=document_categories) - fetch_namespace_from_graph = neo4j_graph_db.get_namespaces_by_document_category(user_id=user_id, category=relevant_categories) - - results = [] - - for namespace in fetch_namespace_from_graph: - memory = await Memory.create_memory(user_id, session, namespace=namespace) - - # Managing memory attributes - existing_user = await Memory.check_existing_user(user_id, session) - print("here is the existing user", existing_user) - await memory.manage_memory_attributes(existing_user) - namespace_class = namespace - memory = await Memory.create_memory(user_id, session, namespace=namespace_class) - - # Managing memory attributes - existing_user = await Memory.check_existing_user(user_id, session) - print("here is the existing user", existing_user) - await memory.manage_memory_attributes(existing_user) - dynamic_memory_class = getattr(memory, namespace_class.lower(), None) - - await memory.add_dynamic_memory_class(dynamic_memory_class, namespace_class) - await memory.add_method_to_class(dynamic_memory_class, "fetch_memories") - raw_document_output = await memory.dynamic_method_call( - memory.semanticmemory_class, - "fetch_memories", - observation=context, - ) - from openai import OpenAI - import instructor - - # Enables `response_model` - client = instructor.patch(OpenAI()) - - format_query_via_gpt = f""" Provide an answer to the user query: {query} Context is: {context}, Document store information is {raw_document_output} """ - - class UserResponse(BaseModel): - response: str - user = client.chat.completions.create( - model=config.model, - response_model=UserResponse, - messages=[ - {"role": "user", "content": format_query_via_gpt}, - ] - ) - results.append(user.response) - return results # query_input = "I walked in the forest yesterday and added to my list I need to buy some milk in the store" # @@ -416,92 +475,6 @@ async def main(): async with session_scope(AsyncSessionLocal()) as session: # out = await get_vectordb_namespace(session, user_id) - - - - - - - # print(out) - - # job_id = "" - # job_id = await fetch_job_id(session, user_id=user_id, job_id=job_id) - # if job_id is None: - # job_id = str(uuid.uuid4()) - # - # await add_entity( - # session, - # Operation( - # id=job_id, - # user_id=user_id, - # operation_params="", - # number_of_files=2, - # operation_status = "RUNNING", - # operation_type="", - # test_set_id="", - # ), - # ) - - # await update_document_vectordb_namespace(session, user_id) - # from cognitive_architecture.graph_database.graph import Neo4jGraphDB - # # Example initialization (replace with your actual connection details) - # neo4j_graph_db = Neo4jGraphDB(url='bolt://localhost:7687', username='neo4j', password='pleaseletmein') - # # Generate the Cypher query for a specific user - # user_id = 'user123' # Replace with the actual user ID - #cypher_query = await neo4j_graph_db.generate_cypher_query_for_user_prompt_decomposition(user_id,"I walked in the forest yesterday and added to my list I need to buy some milk in the store") - # result = neo4j_graph_db.query(cypher_query) - call_of_the_wild_summary = { - "user_id": user_id, - "document_category": "Classic Literature", - "title": "The Call of the Wild", - "summary": ( - "'The Call of the Wild' is a novel by Jack London set in the Yukon during the 1890s Klondike " - "Gold Rush—a period when strong sled dogs were in high demand. The novel's central character " - "is a dog named Buck, a domesticated dog living at a ranch in the Santa Clara Valley of California " - "as the story opens. Stolen from his home and sold into the brutal existence of an Alaskan sled dog, " - "he reverts to atavistic traits. Buck is forced to adjust to, and survive, cruel treatments and fight " - "to dominate other dogs in a harsh climate. Eventually, he sheds the veneer of civilization, relying " - "on primordial instincts and lessons he learns, to emerge as a leader in the wild. London drew on his " - "own experiences in the Klondike, and the book provides a snapshot of the epical gold rush and the " - "harsh realities of life in the wilderness. The novel explores themes of morality versus instinct, " - "the struggle for survival in the natural world, and the intrusion of civilization on the wilderness. " - "As Buck's wild nature is awakened, he rises to become a respected and feared leader in the wild, " - "answering the primal call of nature." - ) - } - # rs = neo4j_graph_db.create_document_node_cypher(call_of_the_wild_summary, user_id) - # - # neo4j_graph_db.query(rs, call_of_the_wild_summary) - # print(cypher_query) - - # from cognitive_architecture.classifiers.classifier import classify_documents - # - # ff = await classify_documents("Lord of the Rings") - # - # print(ff) - - # vector_db_namespaces = await get_vectordb_namespace(session, user_id) - # - # if vector_db_namespaces == []: - # vector_db_namespaces = ["None"] - # - # - # print(vector_db_namespaces) - # for value in vector_db_namespaces: - # print(value) - # - # oo = neo4j_graph_db.update_document_node_with_namespace(user_id,vectordb_namespace= value,document_title="The Call of the Wild") - # logging.info("gg", oo) - # neo4j_graph_db.query(oo) - - - - # await update_document_vectordb_namespace(session, user_id) - # # Execute the generated Cypher query - # result = neo4j_graph_db.query(cypher_query) - - - params = { "version": "1.0", "agreement_id": "AG123456", @@ -519,10 +492,13 @@ async def main(): "format": "PDF", "source": "DEVICE", "path": [".data"], + "strategy": "SUMMARY", } # await load_documents_to_vectorstore(session, user_id, loader_settings=loader_settings) - await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday") - # await add_documents_to_graph_db(session, user_id) + # await user_query_to_graph_db(session, user_id, "I walked in the forest yesterday and added to my list I need to buy some milk in the store and get a summary from a classical book i read yesterday") + # await add_documents_to_graph_db(session, user_id, loader_settings=loader_settings) + + ee = await user_context_enrichment(session, user_id, query="I walked in the forest yesterday and added to my list I need to buy some milk in the store and i am curious about a book i read yesterday about Bartleby, the Scrivener") # memory_instance = Memory(namespace='SEMANTICMEMORY') # sss = await memory_instance.dynamic_method_call(memory_instance.semantic_memory_class, 'fetch_memories', observation='some_observation') # from cognitive_architecture.vectorstore_manager import Memory