From 2bb1da1487ffad564211f497fc7f1ab7d473dbde Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 17 Feb 2024 10:54:30 +0100 Subject: [PATCH] Sqlite works, made fixes in config so it becomes a basis, added a few mods on top --- api.py | 2 +- .../classifiers/classifier.py | 182 ------------------ .../classifiers/classify_documents.py | 64 ++++++ .../classifiers/classify_summary.py | 62 ++++++ .../classifiers/classify_user_input.py | 55 ++++++ .../classifiers/classify_user_query.py | 56 ++++++ .../database/create_database.py | 62 ------ .../database/database_manager.py | 31 +++ .../database/relationaldb/database.py | 2 - main.py | 10 +- 10 files changed, 274 insertions(+), 252 deletions(-) delete mode 100644 cognitive_architecture/classifiers/classifier.py create mode 100644 cognitive_architecture/classifiers/classify_documents.py create mode 100644 cognitive_architecture/classifiers/classify_summary.py create mode 100644 cognitive_architecture/classifiers/classify_user_input.py create mode 100644 cognitive_architecture/classifiers/classify_user_query.py create mode 100644 cognitive_architecture/database/database_manager.py diff --git a/api.py b/api.py index e85422a7a..55f949c96 100644 --- a/api.py +++ b/api.py @@ -218,7 +218,7 @@ async def user_query_classfier(payload: Payload): # Execute the query - replace this with the actual execution method async with session_scope(session=AsyncSessionLocal()) as session: - from cognitive_architecture.classifiers.classifier import ( + from cognitive_architecture.classifiers.classify_user_input import ( classify_user_query, ) diff --git a/cognitive_architecture/classifiers/classifier.py b/cognitive_architecture/classifiers/classifier.py deleted file mode 100644 index 0bf43d3ee..000000000 --- a/cognitive_architecture/classifiers/classifier.py +++ /dev/null @@ -1,182 +0,0 @@ -import logging - -from langchain.prompts import ChatPromptTemplate -import json - -# TO DO, ADD ALL CLASSIFIERS HERE - - -from langchain.chains import create_extraction_chain -from langchain.chat_models import ChatOpenAI - -from ..config import Config -from ..database.vectordb.loaders.loaders import _document_loader - -config = Config() -config.load() -OPENAI_API_KEY = config.openai_key -from langchain.document_loaders import TextLoader -from langchain.document_loaders import DirectoryLoader - - -async def classify_documents(query: str, document_id: str, content: str): - document_context = content - logging.info("This is the document context", document_context) - - llm = ChatOpenAI(temperature=0, model=config.model) - prompt_classify = ChatPromptTemplate.from_template( - """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}""" - ) - json_structure = [ - { - "name": "summarizer", - "description": "Summarization and classification", - "parameters": { - "type": "object", - "properties": { - "DocumentCategory": { - "type": "string", - "description": "The classification of documents in groups such as legal, medical, etc.", - }, - "Title": { - "type": "string", - "description": "The title of the document", - }, - "Summary": { - "type": "string", - "description": "The summary of the document", - }, - "d_id": {"type": "string", "description": "The id of the document"}, - }, - "required": ["DocumentCategory", "Title", "Summary", "d_id"], - }, - } - ] - chain_filter = prompt_classify | llm.bind( - function_call={"name": "summarizer"}, functions=json_structure - ) - classifier_output = await chain_filter.ainvoke( - {"query": query, "d_id": document_id, "context": str(document_context)} - ) - arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] - print("This is the arguments string", arguments_str) - arguments_dict = json.loads(arguments_str) - return arguments_dict - - -# classify retrievals according to type of retrieval -def classify_retrieval(): - pass - - -async def classify_user_input(query, input_type): - llm = ChatOpenAI(temperature=0, model=config.model) - prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}""" - ) - json_structure = [ - { - "name": "classifier", - "description": "Classification", - "parameters": { - "type": "object", - "properties": { - "InputClassification": { - "type": "boolean", - "description": "The classification of the input", - } - }, - "required": ["InputClassification"], - }, - } - ] - chain_filter = prompt_classify | llm.bind( - function_call={"name": "classifier"}, functions=json_structure - ) - classifier_output = await chain_filter.ainvoke( - {"query": query, "input_type": input_type} - ) - arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] - logging.info("This is the arguments string %s", arguments_str) - arguments_dict = json.loads(arguments_str) - logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None)) - InputClassification = arguments_dict.get("InputClassification", None) - logging.info("This is the classification %s", InputClassification) - return InputClassification - - -# classify documents according to type of document -async def classify_call(query, document_summaries): - llm = ChatOpenAI(temperature=0, model=config.model) - prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries and ids:{document_summaries}""" - ) - json_structure = [ - { - "name": "classifier", - "description": "Classification", - "parameters": { - "type": "object", - "properties": { - "DocumentSummary": { - "type": "string", - "description": "The summary of the document and the topic it deals with.", - }, - "d_id": {"type": "string", "description": "The id of the document"}, - }, - "required": ["DocumentSummary"], - }, - } - ] - chain_filter = prompt_classify | llm.bind( - function_call={"name": "classifier"}, functions=json_structure - ) - classifier_output = await chain_filter.ainvoke( - {"query": query, "document_summaries": document_summaries} - ) - arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] - print("This is the arguments string", arguments_str) - arguments_dict = json.loads(arguments_str) - logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None)) - classfier_id = arguments_dict.get("d_id", None) - - print("This is the classifier id ", classfier_id) - - return classfier_id - - -async def classify_user_query(query, context, document_types): - llm = ChatOpenAI(temperature=0, model=config.model) - prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}""" - ) - json_structure = [ - { - "name": "classifier", - "description": "Classification", - "parameters": { - "type": "object", - "properties": { - "UserQueryClassifier": { - "type": "bool", - "description": "The classification of documents in groups such as legal, medical, etc.", - } - }, - "required": ["UserQueryClassiffier"], - }, - } - ] - chain_filter = prompt_classify | llm.bind( - function_call={"name": "classifier"}, functions=json_structure - ) - classifier_output = await chain_filter.ainvoke( - {"query": query, "context": context, "document_types": document_types} - ) - arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] - print("This is the arguments string", arguments_str) - arguments_dict = json.loads(arguments_str) - classfier_value = arguments_dict.get("UserQueryClassifier", None) - - print("This is the classifier value", classfier_value) - - return classfier_value diff --git a/cognitive_architecture/classifiers/classify_documents.py b/cognitive_architecture/classifiers/classify_documents.py new file mode 100644 index 000000000..07aff6d76 --- /dev/null +++ b/cognitive_architecture/classifiers/classify_documents.py @@ -0,0 +1,64 @@ +import logging + +from langchain.prompts import ChatPromptTemplate +import json + +# TO DO, ADD ALL CLASSIFIERS HERE + + +from langchain.chains import create_extraction_chain +from langchain.chat_models import ChatOpenAI + +from ..config import Config +from ..database.vectordb.loaders.loaders import _document_loader + +config = Config() +config.load() +OPENAI_API_KEY = config.openai_key +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader + + +async def classify_documents(query: str, document_id: str, content: str): + document_context = content + logging.info("This is the document context", document_context) + + llm = ChatOpenAI(temperature=0, model=config.model) + prompt_classify = ChatPromptTemplate.from_template( + """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}""" + ) + json_structure = [ + { + "name": "summarizer", + "description": "Summarization and classification", + "parameters": { + "type": "object", + "properties": { + "DocumentCategory": { + "type": "string", + "description": "The classification of documents in groups such as legal, medical, etc.", + }, + "Title": { + "type": "string", + "description": "The title of the document", + }, + "Summary": { + "type": "string", + "description": "The summary of the document", + }, + "d_id": {"type": "string", "description": "The id of the document"}, + }, + "required": ["DocumentCategory", "Title", "Summary", "d_id"], + }, + } + ] + chain_filter = prompt_classify | llm.bind( + function_call={"name": "summarizer"}, functions=json_structure + ) + classifier_output = await chain_filter.ainvoke( + {"query": query, "d_id": document_id, "context": str(document_context)} + ) + arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] + print("This is the arguments string", arguments_str) + arguments_dict = json.loads(arguments_str) + return arguments_dict \ No newline at end of file diff --git a/cognitive_architecture/classifiers/classify_summary.py b/cognitive_architecture/classifiers/classify_summary.py new file mode 100644 index 000000000..45dfe0c32 --- /dev/null +++ b/cognitive_architecture/classifiers/classify_summary.py @@ -0,0 +1,62 @@ +import logging + +from langchain.prompts import ChatPromptTemplate +import json + +# TO DO, ADD ALL CLASSIFIERS HERE + + +from langchain.chains import create_extraction_chain +from langchain.chat_models import ChatOpenAI + +from ..config import Config +from ..database.vectordb.loaders.loaders import _document_loader + +config = Config() +config.load() +OPENAI_API_KEY = config.openai_key +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader + + + + + + +async def classify_summary(query, document_summaries): + llm = ChatOpenAI(temperature=0, model=config.model) + prompt_classify = ChatPromptTemplate.from_template( + """You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries and ids:{document_summaries}""" + ) + json_structure = [ + { + "name": "classifier", + "description": "Classification", + "parameters": { + "type": "object", + "properties": { + "DocumentSummary": { + "type": "string", + "description": "The summary of the document and the topic it deals with.", + }, + "d_id": {"type": "string", "description": "The id of the document"}, + }, + "required": ["DocumentSummary"], + }, + } + ] + chain_filter = prompt_classify | llm.bind( + function_call={"name": "classifier"}, functions=json_structure + ) + classifier_output = await chain_filter.ainvoke( + {"query": query, "document_summaries": document_summaries} + ) + arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] + print("This is the arguments string", arguments_str) + arguments_dict = json.loads(arguments_str) + logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None)) + classfier_id = arguments_dict.get("d_id", None) + + print("This is the classifier id ", classfier_id) + + return classfier_id \ No newline at end of file diff --git a/cognitive_architecture/classifiers/classify_user_input.py b/cognitive_architecture/classifiers/classify_user_input.py new file mode 100644 index 000000000..840039ee4 --- /dev/null +++ b/cognitive_architecture/classifiers/classify_user_input.py @@ -0,0 +1,55 @@ +import logging + +from langchain.prompts import ChatPromptTemplate +import json + +# TO DO, ADD ALL CLASSIFIERS HERE + + +from langchain.chains import create_extraction_chain +from langchain.chat_models import ChatOpenAI + +from ..config import Config +from ..database.vectordb.loaders.loaders import _document_loader + +config = Config() +config.load() +OPENAI_API_KEY = config.openai_key +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader + + +async def classify_user_input(query, input_type): + llm = ChatOpenAI(temperature=0, model=config.model) + prompt_classify = ChatPromptTemplate.from_template( + """You are a classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}""" + ) + json_structure = [ + { + "name": "classifier", + "description": "Classification", + "parameters": { + "type": "object", + "properties": { + "InputClassification": { + "type": "boolean", + "description": "The classification of the input", + } + }, + "required": ["InputClassification"], + }, + } + ] + chain_filter = prompt_classify | llm.bind( + function_call={"name": "classifier"}, functions=json_structure + ) + classifier_output = await chain_filter.ainvoke( + {"query": query, "input_type": input_type} + ) + arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] + logging.info("This is the arguments string %s", arguments_str) + arguments_dict = json.loads(arguments_str) + logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None)) + InputClassification = arguments_dict.get("InputClassification", None) + logging.info("This is the classification %s", InputClassification) + return InputClassification \ No newline at end of file diff --git a/cognitive_architecture/classifiers/classify_user_query.py b/cognitive_architecture/classifiers/classify_user_query.py new file mode 100644 index 000000000..1f4210028 --- /dev/null +++ b/cognitive_architecture/classifiers/classify_user_query.py @@ -0,0 +1,56 @@ +import logging + +from langchain.prompts import ChatPromptTemplate +import json + +# TO DO, ADD ALL CLASSIFIERS HERE + + +from langchain.chains import create_extraction_chain +from langchain.chat_models import ChatOpenAI + +from ..config import Config +from ..database.vectordb.loaders.loaders import _document_loader + +config = Config() +config.load() +OPENAI_API_KEY = config.openai_key +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader + + +async def classify_user_query(query, context, document_types): + llm = ChatOpenAI(temperature=0, model=config.model) + prompt_classify = ChatPromptTemplate.from_template( + """You are a classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}""" + ) + json_structure = [ + { + "name": "classifier", + "description": "Classification", + "parameters": { + "type": "object", + "properties": { + "UserQueryClassifier": { + "type": "bool", + "description": "The classification of documents in groups such as legal, medical, etc.", + } + }, + "required": ["UserQueryClassiffier"], + }, + } + ] + chain_filter = prompt_classify | llm.bind( + function_call={"name": "classifier"}, functions=json_structure + ) + classifier_output = await chain_filter.ainvoke( + {"query": query, "context": context, "document_types": document_types} + ) + arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] + print("This is the arguments string", arguments_str) + arguments_dict = json.loads(arguments_str) + classfier_value = arguments_dict.get("UserQueryClassifier", None) + + print("This is the classifier value", classfier_value) + + return classfier_value diff --git a/cognitive_architecture/database/create_database.py b/cognitive_architecture/database/create_database.py index 29ac69a6e..3dddf7f9a 100644 --- a/cognitive_architecture/database/create_database.py +++ b/cognitive_architecture/database/create_database.py @@ -70,66 +70,4 @@ class DatabaseManager: async with self.engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) -if __name__ == "__main__": - async def main(): - """Runs as a part of startup docker scripts to create the database and tables.""" - dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name) - db_manager = DatabaseManager(config=dbconfig) - database_name = dbconfig.db_name - - if not await db_manager.database_exists(database_name): - print(f"Database {database_name} does not exist. Creating...") - await db_manager.create_database(database_name) - print(f"Database {database_name} created successfully.") - - await db_manager.create_tables() - - asyncio.run(main()) -# -# def create_admin_engine(username, password, host, database_name): -# admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" -# return create_engine(admin_url) -# -# def database_exists(connection, db_name): -# query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") -# result = connection.execute(query).fetchone() -# return result is not None -# -# def create_database(connection, db_name): -# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) -# cursor = connection.cursor() -# cursor.execute(f"CREATE DATABASE {db_name}") -# cursor.close() -# -# def drop_database(connection, db_name): -# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) -# cursor = connection.cursor() -# cursor.execute(f"DROP DATABASE IF EXISTS {db_name}") -# cursor.close() -# -# -# -# def create_tables(engine): -# Base.metadata.create_all(bind = engine) -# -# if __name__ == "__main__": -# host = os.environ.get('POSTGRES_HOST') -# username = os.environ.get('POSTGRES_USER') -# password = os.environ.get('POSTGRES_PASSWORD') -# database_name = os.environ.get('POSTGRES_DB') -# -# engine = create_admin_engine(username, password, host, database_name) -# connection = engine.connect() -# -# # print(Base.metadata.tables) -# -# if not database_exists(connection, database_name): -# logger.info(f"Database {database_name} does not exist. Creating...") -# create_database(connection, database_name) -# logger.info(f"Database {database_name} created successfully.") -# -# connection.close() -# engine.dispose() -# -# create_tables(engine) diff --git a/cognitive_architecture/database/database_manager.py b/cognitive_architecture/database/database_manager.py new file mode 100644 index 000000000..454143f31 --- /dev/null +++ b/cognitive_architecture/database/database_manager.py @@ -0,0 +1,31 @@ +import asyncio +import logging + +from dotenv import load_dotenv + +from cognitive_architecture.config import Config +from cognitive_architecture.database.create_database import DatabaseManager +from cognitive_architecture.database.relationaldb.database import DatabaseConfig + +config = Config() +config.load() + +load_dotenv() +logger = logging.getLogger(__name__) +async def main(): + """Runs as a part of startup docker scripts to create the database and tables.""" + + dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name) + db_manager = DatabaseManager(config=dbconfig) + database_name = dbconfig.db_name + + if not await db_manager.database_exists(database_name): + print(f"Database {database_name} does not exist. Creating...") + await db_manager.create_database(database_name) + print(f"Database {database_name} created successfully.") + + await db_manager.create_tables() + +if __name__ == "__main__": + + asyncio.run(main()) \ No newline at end of file diff --git a/cognitive_architecture/database/relationaldb/database.py b/cognitive_architecture/database/relationaldb/database.py index ba34bfd26..d6d0c89b6 100644 --- a/cognitive_architecture/database/relationaldb/database.py +++ b/cognitive_architecture/database/relationaldb/database.py @@ -78,8 +78,6 @@ AsyncSessionLocal = sessionmaker( Base = declarative_base() - -# Use asynccontextmanager to define an async context manager @asynccontextmanager async def get_db(): """Provide a database session to the context.""" diff --git a/main.py b/main.py index 42093a0c7..5b52bb8dc 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,6 @@ from neo4j.exceptions import Neo4jError from pydantic import BaseModel, Field from cognitive_architecture.database.graphdb.graph import Neo4jGraphDB from cognitive_architecture.database.relationaldb.models.memory import MemoryModel -from cognitive_architecture.classifiers.classifier import classify_documents import os from dotenv import load_dotenv from cognitive_architecture.database.relationaldb.database_crud import ( @@ -30,7 +29,10 @@ from cognitive_architecture.database.relationaldb.models.metadatas import MetaDa from cognitive_architecture.database.relationaldb.models.docs import DocsModel from cognitive_architecture.database.relationaldb.models.memory import MemoryModel from cognitive_architecture.database.relationaldb.models.user import User -from cognitive_architecture.classifiers.classifier import classify_call +from cognitive_architecture.classifiers.classify_summary import classify_summary +from cognitive_architecture.classifiers.classify_documents import classify_documents +from cognitive_architecture.classifiers.classify_user_query import classify_user_query +from cognitive_architecture.classifiers.classify_user_input import classify_user_input aclient = instructor.patch(OpenAI()) DEFAULT_PRESET = "promethai_chat" @@ -59,8 +61,6 @@ from cognitive_architecture.shared.language_processing import ( translate_text, detect_language, ) -from cognitive_architecture.classifiers.classifier import classify_user_input - async def fetch_document_vectordb_namespace( session: AsyncSession, user_id: str, namespace_id: str, doc_id: str = None @@ -553,7 +553,7 @@ async def user_context_enrichment( relevant_summary_id = None for _ in range(max_attempts): - relevant_summary_id = await classify_call( + relevant_summary_id = await classify_summary( query=query, document_summaries=str(summaries) )