Sqlite works, made fixes in config so it becomes a basis, added a few mods on top

This commit is contained in:
Vasilije 2024-02-17 10:54:30 +01:00
parent 91fe3f55a7
commit 2bb1da1487
10 changed files with 274 additions and 252 deletions

2
api.py
View file

@ -218,7 +218,7 @@ async def user_query_classfier(payload: Payload):
# Execute the query - replace this with the actual execution method
async with session_scope(session=AsyncSessionLocal()) as session:
from cognitive_architecture.classifiers.classifier import (
from cognitive_architecture.classifiers.classify_user_input import (
classify_user_query,
)

View file

@ -1,182 +0,0 @@
import logging
from langchain.prompts import ChatPromptTemplate
import json
# TO DO, ADD ALL CLASSIFIERS HERE
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
async def classify_documents(query: str, document_id: str, content: str):
document_context = content
logging.info("This is the document context", document_context)
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}"""
)
json_structure = [
{
"name": "summarizer",
"description": "Summarization and classification",
"parameters": {
"type": "object",
"properties": {
"DocumentCategory": {
"type": "string",
"description": "The classification of documents in groups such as legal, medical, etc.",
},
"Title": {
"type": "string",
"description": "The title of the document",
},
"Summary": {
"type": "string",
"description": "The summary of the document",
},
"d_id": {"type": "string", "description": "The id of the document"},
},
"required": ["DocumentCategory", "Title", "Summary", "d_id"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "summarizer"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "d_id": document_id, "context": str(document_context)}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
return arguments_dict
# classify retrievals according to type of retrieval
def classify_retrieval():
pass
async def classify_user_input(query, input_type):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"InputClassification": {
"type": "boolean",
"description": "The classification of the input",
}
},
"required": ["InputClassification"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "input_type": input_type}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
logging.info("This is the arguments string %s", arguments_str)
arguments_dict = json.loads(arguments_str)
logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
InputClassification = arguments_dict.get("InputClassification", None)
logging.info("This is the classification %s", InputClassification)
return InputClassification
# classify documents according to type of document
async def classify_call(query, document_summaries):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries and ids:{document_summaries}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"DocumentSummary": {
"type": "string",
"description": "The summary of the document and the topic it deals with.",
},
"d_id": {"type": "string", "description": "The id of the document"},
},
"required": ["DocumentSummary"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "document_summaries": document_summaries}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
classfier_id = arguments_dict.get("d_id", None)
print("This is the classifier id ", classfier_id)
return classfier_id
async def classify_user_query(query, context, document_types):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"UserQueryClassifier": {
"type": "bool",
"description": "The classification of documents in groups such as legal, medical, etc.",
}
},
"required": ["UserQueryClassiffier"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "context": context, "document_types": document_types}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
classfier_value = arguments_dict.get("UserQueryClassifier", None)
print("This is the classifier value", classfier_value)
return classfier_value

View file

@ -0,0 +1,64 @@
import logging
from langchain.prompts import ChatPromptTemplate
import json
# TO DO, ADD ALL CLASSIFIERS HERE
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
async def classify_documents(query: str, document_id: str, content: str):
document_context = content
logging.info("This is the document context", document_context)
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}"""
)
json_structure = [
{
"name": "summarizer",
"description": "Summarization and classification",
"parameters": {
"type": "object",
"properties": {
"DocumentCategory": {
"type": "string",
"description": "The classification of documents in groups such as legal, medical, etc.",
},
"Title": {
"type": "string",
"description": "The title of the document",
},
"Summary": {
"type": "string",
"description": "The summary of the document",
},
"d_id": {"type": "string", "description": "The id of the document"},
},
"required": ["DocumentCategory", "Title", "Summary", "d_id"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "summarizer"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "d_id": document_id, "context": str(document_context)}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
return arguments_dict

View file

@ -0,0 +1,62 @@
import logging
from langchain.prompts import ChatPromptTemplate
import json
# TO DO, ADD ALL CLASSIFIERS HERE
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
async def classify_summary(query, document_summaries):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries and ids:{document_summaries}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"DocumentSummary": {
"type": "string",
"description": "The summary of the document and the topic it deals with.",
},
"d_id": {"type": "string", "description": "The id of the document"},
},
"required": ["DocumentSummary"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "document_summaries": document_summaries}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
classfier_id = arguments_dict.get("d_id", None)
print("This is the classifier id ", classfier_id)
return classfier_id

View file

@ -0,0 +1,55 @@
import logging
from langchain.prompts import ChatPromptTemplate
import json
# TO DO, ADD ALL CLASSIFIERS HERE
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
async def classify_user_input(query, input_type):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"InputClassification": {
"type": "boolean",
"description": "The classification of the input",
}
},
"required": ["InputClassification"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "input_type": input_type}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
logging.info("This is the arguments string %s", arguments_str)
arguments_dict = json.loads(arguments_str)
logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
InputClassification = arguments_dict.get("InputClassification", None)
logging.info("This is the classification %s", InputClassification)
return InputClassification

View file

@ -0,0 +1,56 @@
import logging
from langchain.prompts import ChatPromptTemplate
import json
# TO DO, ADD ALL CLASSIFIERS HERE
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
async def classify_user_query(query, context, document_types):
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}"""
)
json_structure = [
{
"name": "classifier",
"description": "Classification",
"parameters": {
"type": "object",
"properties": {
"UserQueryClassifier": {
"type": "bool",
"description": "The classification of documents in groups such as legal, medical, etc.",
}
},
"required": ["UserQueryClassiffier"],
},
}
]
chain_filter = prompt_classify | llm.bind(
function_call={"name": "classifier"}, functions=json_structure
)
classifier_output = await chain_filter.ainvoke(
{"query": query, "context": context, "document_types": document_types}
)
arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
print("This is the arguments string", arguments_str)
arguments_dict = json.loads(arguments_str)
classfier_value = arguments_dict.get("UserQueryClassifier", None)
print("This is the classifier value", classfier_value)
return classfier_value

View file

@ -70,66 +70,4 @@ class DatabaseManager:
async with self.engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
if __name__ == "__main__":
async def main():
"""Runs as a part of startup docker scripts to create the database and tables."""
dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name)
db_manager = DatabaseManager(config=dbconfig)
database_name = dbconfig.db_name
if not await db_manager.database_exists(database_name):
print(f"Database {database_name} does not exist. Creating...")
await db_manager.create_database(database_name)
print(f"Database {database_name} created successfully.")
await db_manager.create_tables()
asyncio.run(main())
#
# def create_admin_engine(username, password, host, database_name):
# admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}"
# return create_engine(admin_url)
#
# def database_exists(connection, db_name):
# query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'")
# result = connection.execute(query).fetchone()
# return result is not None
#
# def create_database(connection, db_name):
# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
# cursor = connection.cursor()
# cursor.execute(f"CREATE DATABASE {db_name}")
# cursor.close()
#
# def drop_database(connection, db_name):
# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
# cursor = connection.cursor()
# cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
# cursor.close()
#
#
#
# def create_tables(engine):
# Base.metadata.create_all(bind = engine)
#
# if __name__ == "__main__":
# host = os.environ.get('POSTGRES_HOST')
# username = os.environ.get('POSTGRES_USER')
# password = os.environ.get('POSTGRES_PASSWORD')
# database_name = os.environ.get('POSTGRES_DB')
#
# engine = create_admin_engine(username, password, host, database_name)
# connection = engine.connect()
#
# # print(Base.metadata.tables)
#
# if not database_exists(connection, database_name):
# logger.info(f"Database {database_name} does not exist. Creating...")
# create_database(connection, database_name)
# logger.info(f"Database {database_name} created successfully.")
#
# connection.close()
# engine.dispose()
#
# create_tables(engine)

View file

@ -0,0 +1,31 @@
import asyncio
import logging
from dotenv import load_dotenv
from cognitive_architecture.config import Config
from cognitive_architecture.database.create_database import DatabaseManager
from cognitive_architecture.database.relationaldb.database import DatabaseConfig
config = Config()
config.load()
load_dotenv()
logger = logging.getLogger(__name__)
async def main():
"""Runs as a part of startup docker scripts to create the database and tables."""
dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name)
db_manager = DatabaseManager(config=dbconfig)
database_name = dbconfig.db_name
if not await db_manager.database_exists(database_name):
print(f"Database {database_name} does not exist. Creating...")
await db_manager.create_database(database_name)
print(f"Database {database_name} created successfully.")
await db_manager.create_tables()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -78,8 +78,6 @@ AsyncSessionLocal = sessionmaker(
Base = declarative_base()
# Use asynccontextmanager to define an async context manager
@asynccontextmanager
async def get_db():
"""Provide a database session to the context."""

10
main.py
View file

@ -4,7 +4,6 @@ from neo4j.exceptions import Neo4jError
from pydantic import BaseModel, Field
from cognitive_architecture.database.graphdb.graph import Neo4jGraphDB
from cognitive_architecture.database.relationaldb.models.memory import MemoryModel
from cognitive_architecture.classifiers.classifier import classify_documents
import os
from dotenv import load_dotenv
from cognitive_architecture.database.relationaldb.database_crud import (
@ -30,7 +29,10 @@ from cognitive_architecture.database.relationaldb.models.metadatas import MetaDa
from cognitive_architecture.database.relationaldb.models.docs import DocsModel
from cognitive_architecture.database.relationaldb.models.memory import MemoryModel
from cognitive_architecture.database.relationaldb.models.user import User
from cognitive_architecture.classifiers.classifier import classify_call
from cognitive_architecture.classifiers.classify_summary import classify_summary
from cognitive_architecture.classifiers.classify_documents import classify_documents
from cognitive_architecture.classifiers.classify_user_query import classify_user_query
from cognitive_architecture.classifiers.classify_user_input import classify_user_input
aclient = instructor.patch(OpenAI())
DEFAULT_PRESET = "promethai_chat"
@ -59,8 +61,6 @@ from cognitive_architecture.shared.language_processing import (
translate_text,
detect_language,
)
from cognitive_architecture.classifiers.classifier import classify_user_input
async def fetch_document_vectordb_namespace(
session: AsyncSession, user_id: str, namespace_id: str, doc_id: str = None
@ -553,7 +553,7 @@ async def user_context_enrichment(
relevant_summary_id = None
for _ in range(max_attempts):
relevant_summary_id = await classify_call(
relevant_summary_id = await classify_summary(
query=query, document_summaries=str(summaries)
)