Sqlite works, made fixes in config so it becomes a basis, added a few mods on top

2024-02-17 10:54:30 +01:00 · 2024-02-17 10:54:30 +01:00 · 2bb1da1487
commit 2bb1da1487
parent 91fe3f55a7
10 changed files with 274 additions and 252 deletions
--- a/api.py
+++ b/api.py
@ -218,7 +218,7 @@ async def user_query_classfier(payload: Payload):

        # Execute the query - replace this with the actual execution method
        async with session_scope(session=AsyncSessionLocal()) as session:
-            from cognitive_architecture.classifiers.classifier import (
+            from cognitive_architecture.classifiers.classify_user_input import (
                classify_user_query,
            )

--- a/cognitive_architecture/classifiers/classifier.py
+++ b/cognitive_architecture/classifiers/classifier.py
@ -1,182 +0,0 @@
-import logging
-
-from langchain.prompts import ChatPromptTemplate
-import json
-
-# TO DO, ADD ALL CLASSIFIERS HERE
-
-
-from langchain.chains import create_extraction_chain
-from langchain.chat_models import ChatOpenAI
-
-from ..config import Config
-from ..database.vectordb.loaders.loaders import _document_loader
-
-config = Config()
-config.load()
-OPENAI_API_KEY = config.openai_key
-from langchain.document_loaders import TextLoader
-from langchain.document_loaders import DirectoryLoader
-
-
-async def classify_documents(query: str, document_id: str, content: str):
-    document_context = content
-    logging.info("This is the document context", document_context)
-
-    llm = ChatOpenAI(temperature=0, model=config.model)
-    prompt_classify = ChatPromptTemplate.from_template(
-        """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}"""
-    )
-    json_structure = [
-        {
-            "name": "summarizer",
-            "description": "Summarization and classification",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "DocumentCategory": {
-                        "type": "string",
-                        "description": "The classification of documents in groups such as legal, medical, etc.",
-                    },
-                    "Title": {
-                        "type": "string",
-                        "description": "The title of the document",
-                    },
-                    "Summary": {
-                        "type": "string",
-                        "description": "The summary of the document",
-                    },
-                    "d_id": {"type": "string", "description": "The id of the document"},
-                },
-                "required": ["DocumentCategory", "Title", "Summary", "d_id"],
-            },
-        }
-    ]
-    chain_filter = prompt_classify | llm.bind(
-        function_call={"name": "summarizer"}, functions=json_structure
-    )
-    classifier_output = await chain_filter.ainvoke(
-        {"query": query, "d_id": document_id, "context": str(document_context)}
-    )
-    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
-    print("This is the arguments string", arguments_str)
-    arguments_dict = json.loads(arguments_str)
-    return arguments_dict
-
-
-# classify retrievals according to type of retrieval
-def classify_retrieval():
-    pass
-
-
-async def classify_user_input(query, input_type):
-    llm = ChatOpenAI(temperature=0, model=config.model)
-    prompt_classify = ChatPromptTemplate.from_template(
-        """You are a  classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}"""
-    )
-    json_structure = [
-        {
-            "name": "classifier",
-            "description": "Classification",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "InputClassification": {
-                        "type": "boolean",
-                        "description": "The classification of the input",
-                    }
-                },
-                "required": ["InputClassification"],
-            },
-        }
-    ]
-    chain_filter = prompt_classify | llm.bind(
-        function_call={"name": "classifier"}, functions=json_structure
-    )
-    classifier_output = await chain_filter.ainvoke(
-        {"query": query, "input_type": input_type}
-    )
-    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
-    logging.info("This is the arguments string %s", arguments_str)
-    arguments_dict = json.loads(arguments_str)
-    logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
-    InputClassification = arguments_dict.get("InputClassification", None)
-    logging.info("This is the classification %s", InputClassification)
-    return InputClassification
-
-
-# classify documents according to type of document
-async def classify_call(query, document_summaries):
-    llm = ChatOpenAI(temperature=0, model=config.model)
-    prompt_classify = ChatPromptTemplate.from_template(
-        """You are a  classifier. Determine what document  are relevant for the given query: {query}, Document summaries and ids:{document_summaries}"""
-    )
-    json_structure = [
-        {
-            "name": "classifier",
-            "description": "Classification",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "DocumentSummary": {
-                        "type": "string",
-                        "description": "The summary of the document and the topic it deals with.",
-                    },
-                    "d_id": {"type": "string", "description": "The id of the document"},
-                },
-                "required": ["DocumentSummary"],
-            },
-        }
-    ]
-    chain_filter = prompt_classify | llm.bind(
-        function_call={"name": "classifier"}, functions=json_structure
-    )
-    classifier_output = await chain_filter.ainvoke(
-        {"query": query, "document_summaries": document_summaries}
-    )
-    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
-    print("This is the arguments string", arguments_str)
-    arguments_dict = json.loads(arguments_str)
-    logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
-    classfier_id = arguments_dict.get("d_id", None)
-
-    print("This is the classifier id ", classfier_id)
-
-    return classfier_id
-
-
-async def classify_user_query(query, context, document_types):
-    llm = ChatOpenAI(temperature=0, model=config.model)
-    prompt_classify = ChatPromptTemplate.from_template(
-        """You are a  classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}"""
-    )
-    json_structure = [
-        {
-            "name": "classifier",
-            "description": "Classification",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "UserQueryClassifier": {
-                        "type": "bool",
-                        "description": "The classification of documents in groups such as legal, medical, etc.",
-                    }
-                },
-                "required": ["UserQueryClassiffier"],
-            },
-        }
-    ]
-    chain_filter = prompt_classify | llm.bind(
-        function_call={"name": "classifier"}, functions=json_structure
-    )
-    classifier_output = await chain_filter.ainvoke(
-        {"query": query, "context": context, "document_types": document_types}
-    )
-    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
-    print("This is the arguments string", arguments_str)
-    arguments_dict = json.loads(arguments_str)
-    classfier_value = arguments_dict.get("UserQueryClassifier", None)
-
-    print("This is the classifier value", classfier_value)
-
-    return classfier_value
--- a/cognitive_architecture/classifiers/classify_documents.py
+++ b/cognitive_architecture/classifiers/classify_documents.py
@ -0,0 +1,64 @@
+import logging
+
+from langchain.prompts import ChatPromptTemplate
+import json
+
+# TO DO, ADD ALL CLASSIFIERS HERE
+
+
+from langchain.chains import create_extraction_chain
+from langchain.chat_models import ChatOpenAI
+
+from ..config import Config
+from ..database.vectordb.loaders.loaders import _document_loader
+
+config = Config()
+config.load()
+OPENAI_API_KEY = config.openai_key
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import DirectoryLoader
+
+
+async def classify_documents(query: str, document_id: str, content: str):
+    document_context = content
+    logging.info("This is the document context", document_context)
+
+    llm = ChatOpenAI(temperature=0, model=config.model)
+    prompt_classify = ChatPromptTemplate.from_template(
+        """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}"""
+    )
+    json_structure = [
+        {
+            "name": "summarizer",
+            "description": "Summarization and classification",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "DocumentCategory": {
+                        "type": "string",
+                        "description": "The classification of documents in groups such as legal, medical, etc.",
+                    },
+                    "Title": {
+                        "type": "string",
+                        "description": "The title of the document",
+                    },
+                    "Summary": {
+                        "type": "string",
+                        "description": "The summary of the document",
+                    },
+                    "d_id": {"type": "string", "description": "The id of the document"},
+                },
+                "required": ["DocumentCategory", "Title", "Summary", "d_id"],
+            },
+        }
+    ]
+    chain_filter = prompt_classify | llm.bind(
+        function_call={"name": "summarizer"}, functions=json_structure
+    )
+    classifier_output = await chain_filter.ainvoke(
+        {"query": query, "d_id": document_id, "context": str(document_context)}
+    )
+    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
+    print("This is the arguments string", arguments_str)
+    arguments_dict = json.loads(arguments_str)
+    return arguments_dict
--- a/cognitive_architecture/classifiers/classify_summary.py
+++ b/cognitive_architecture/classifiers/classify_summary.py
@ -0,0 +1,62 @@
+import logging
+
+from langchain.prompts import ChatPromptTemplate
+import json
+
+# TO DO, ADD ALL CLASSIFIERS HERE
+
+
+from langchain.chains import create_extraction_chain
+from langchain.chat_models import ChatOpenAI
+
+from ..config import Config
+from ..database.vectordb.loaders.loaders import _document_loader
+
+config = Config()
+config.load()
+OPENAI_API_KEY = config.openai_key
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import DirectoryLoader
+
+
+
+
+
+
+async def classify_summary(query, document_summaries):
+    llm = ChatOpenAI(temperature=0, model=config.model)
+    prompt_classify = ChatPromptTemplate.from_template(
+        """You are a  classifier. Determine what document  are relevant for the given query: {query}, Document summaries and ids:{document_summaries}"""
+    )
+    json_structure = [
+        {
+            "name": "classifier",
+            "description": "Classification",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "DocumentSummary": {
+                        "type": "string",
+                        "description": "The summary of the document and the topic it deals with.",
+                    },
+                    "d_id": {"type": "string", "description": "The id of the document"},
+                },
+                "required": ["DocumentSummary"],
+            },
+        }
+    ]
+    chain_filter = prompt_classify | llm.bind(
+        function_call={"name": "classifier"}, functions=json_structure
+    )
+    classifier_output = await chain_filter.ainvoke(
+        {"query": query, "document_summaries": document_summaries}
+    )
+    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
+    print("This is the arguments string", arguments_str)
+    arguments_dict = json.loads(arguments_str)
+    logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
+    classfier_id = arguments_dict.get("d_id", None)
+
+    print("This is the classifier id ", classfier_id)
+
+    return classfier_id
--- a/cognitive_architecture/classifiers/classify_user_input.py
+++ b/cognitive_architecture/classifiers/classify_user_input.py
@ -0,0 +1,55 @@
+import logging
+
+from langchain.prompts import ChatPromptTemplate
+import json
+
+# TO DO, ADD ALL CLASSIFIERS HERE
+
+
+from langchain.chains import create_extraction_chain
+from langchain.chat_models import ChatOpenAI
+
+from ..config import Config
+from ..database.vectordb.loaders.loaders import _document_loader
+
+config = Config()
+config.load()
+OPENAI_API_KEY = config.openai_key
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import DirectoryLoader
+
+
+async def classify_user_input(query, input_type):
+    llm = ChatOpenAI(temperature=0, model=config.model)
+    prompt_classify = ChatPromptTemplate.from_template(
+        """You are a  classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}"""
+    )
+    json_structure = [
+        {
+            "name": "classifier",
+            "description": "Classification",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "InputClassification": {
+                        "type": "boolean",
+                        "description": "The classification of the input",
+                    }
+                },
+                "required": ["InputClassification"],
+            },
+        }
+    ]
+    chain_filter = prompt_classify | llm.bind(
+        function_call={"name": "classifier"}, functions=json_structure
+    )
+    classifier_output = await chain_filter.ainvoke(
+        {"query": query, "input_type": input_type}
+    )
+    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
+    logging.info("This is the arguments string %s", arguments_str)
+    arguments_dict = json.loads(arguments_str)
+    logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
+    InputClassification = arguments_dict.get("InputClassification", None)
+    logging.info("This is the classification %s", InputClassification)
+    return InputClassification
--- a/cognitive_architecture/classifiers/classify_user_query.py
+++ b/cognitive_architecture/classifiers/classify_user_query.py
@ -0,0 +1,56 @@
+import logging
+
+from langchain.prompts import ChatPromptTemplate
+import json
+
+# TO DO, ADD ALL CLASSIFIERS HERE
+
+
+from langchain.chains import create_extraction_chain
+from langchain.chat_models import ChatOpenAI
+
+from ..config import Config
+from ..database.vectordb.loaders.loaders import _document_loader
+
+config = Config()
+config.load()
+OPENAI_API_KEY = config.openai_key
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import DirectoryLoader
+
+
+async def classify_user_query(query, context, document_types):
+    llm = ChatOpenAI(temperature=0, model=config.model)
+    prompt_classify = ChatPromptTemplate.from_template(
+        """You are a  classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}"""
+    )
+    json_structure = [
+        {
+            "name": "classifier",
+            "description": "Classification",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "UserQueryClassifier": {
+                        "type": "bool",
+                        "description": "The classification of documents in groups such as legal, medical, etc.",
+                    }
+                },
+                "required": ["UserQueryClassiffier"],
+            },
+        }
+    ]
+    chain_filter = prompt_classify | llm.bind(
+        function_call={"name": "classifier"}, functions=json_structure
+    )
+    classifier_output = await chain_filter.ainvoke(
+        {"query": query, "context": context, "document_types": document_types}
+    )
+    arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"]
+    print("This is the arguments string", arguments_str)
+    arguments_dict = json.loads(arguments_str)
+    classfier_value = arguments_dict.get("UserQueryClassifier", None)
+
+    print("This is the classifier value", classfier_value)
+
+    return classfier_value
--- a/cognitive_architecture/database/create_database.py
+++ b/cognitive_architecture/database/create_database.py
@ -70,66 +70,4 @@ class DatabaseManager:
        async with self.engine.begin() as conn:
            await conn.run_sync(Base.metadata.create_all)

-if __name__ == "__main__":
-    async def main():
-        """Runs as a part of startup docker scripts to create the database and tables."""

-        dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name)
-        db_manager = DatabaseManager(config=dbconfig)
-        database_name = dbconfig.db_name
-
-        if not await db_manager.database_exists(database_name):
-            print(f"Database {database_name} does not exist. Creating...")
-            await db_manager.create_database(database_name)
-            print(f"Database {database_name} created successfully.")
-
-        await db_manager.create_tables()
-
-    asyncio.run(main())
-#
-# def create_admin_engine(username, password, host, database_name):
-#     admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}"
-#     return create_engine(admin_url)
-#
-# def database_exists(connection, db_name):
-#     query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'")
-#     result = connection.execute(query).fetchone()
-#     return result is not None
-#
-# def create_database(connection, db_name):
-#     connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-#     cursor = connection.cursor()
-#     cursor.execute(f"CREATE DATABASE {db_name}")
-#     cursor.close()
-#
-# def drop_database(connection, db_name):
-#     connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
-#     cursor = connection.cursor()
-#     cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
-#     cursor.close()
-#
-#
-#
-# def create_tables(engine):
-#     Base.metadata.create_all(bind = engine)
-#
-# if __name__ == "__main__":
-#     host = os.environ.get('POSTGRES_HOST')
-#     username = os.environ.get('POSTGRES_USER')
-#     password = os.environ.get('POSTGRES_PASSWORD')
-#     database_name = os.environ.get('POSTGRES_DB')
-#
-#     engine = create_admin_engine(username, password, host, database_name)
-#     connection = engine.connect()
-#
-#     # print(Base.metadata.tables)
-#
-#     if not database_exists(connection, database_name):
-#         logger.info(f"Database {database_name} does not exist. Creating...")
-#         create_database(connection, database_name)
-#         logger.info(f"Database {database_name} created successfully.")
-#
-#     connection.close()
-#     engine.dispose()
-#
-#     create_tables(engine)
--- a/cognitive_architecture/database/database_manager.py
+++ b/cognitive_architecture/database/database_manager.py
@ -0,0 +1,31 @@
+import asyncio
+import logging
+
+from dotenv import load_dotenv
+
+from cognitive_architecture.config import Config
+from cognitive_architecture.database.create_database import DatabaseManager
+from cognitive_architecture.database.relationaldb.database import DatabaseConfig
+
+config = Config()
+config.load()
+
+load_dotenv()
+logger = logging.getLogger(__name__)
+async def main():
+    """Runs as a part of startup docker scripts to create the database and tables."""
+
+    dbconfig = DatabaseConfig(db_type=config.db_type, db_name=config.db_name)
+    db_manager = DatabaseManager(config=dbconfig)
+    database_name = dbconfig.db_name
+
+    if not await db_manager.database_exists(database_name):
+        print(f"Database {database_name} does not exist. Creating...")
+        await db_manager.create_database(database_name)
+        print(f"Database {database_name} created successfully.")
+
+    await db_manager.create_tables()
+
+if __name__ == "__main__":
+
+    asyncio.run(main())
--- a/cognitive_architecture/database/relationaldb/database.py
+++ b/cognitive_architecture/database/relationaldb/database.py
@ -78,8 +78,6 @@ AsyncSessionLocal = sessionmaker(

 Base = declarative_base()

-
-# Use asynccontextmanager to define an async context manager
@asynccontextmanager
 async def get_db():
    """Provide a database session to the context."""
--- a/main.py
+++ b/main.py
@ -4,7 +4,6 @@ from neo4j.exceptions import Neo4jError
 from pydantic import BaseModel, Field
 from cognitive_architecture.database.graphdb.graph import Neo4jGraphDB
 from cognitive_architecture.database.relationaldb.models.memory import MemoryModel
-from cognitive_architecture.classifiers.classifier import classify_documents
 import os
 from dotenv import load_dotenv
 from cognitive_architecture.database.relationaldb.database_crud import (
@ -30,7 +29,10 @@ from cognitive_architecture.database.relationaldb.models.metadatas import MetaDa
 from cognitive_architecture.database.relationaldb.models.docs import DocsModel
 from cognitive_architecture.database.relationaldb.models.memory import MemoryModel
 from cognitive_architecture.database.relationaldb.models.user import User
-from cognitive_architecture.classifiers.classifier import classify_call
+from cognitive_architecture.classifiers.classify_summary import classify_summary
+from cognitive_architecture.classifiers.classify_documents import classify_documents
+from cognitive_architecture.classifiers.classify_user_query import classify_user_query
+from cognitive_architecture.classifiers.classify_user_input import classify_user_input

 aclient = instructor.patch(OpenAI())
 DEFAULT_PRESET = "promethai_chat"
@ -59,8 +61,6 @@ from cognitive_architecture.shared.language_processing import (
    translate_text,
    detect_language,
 )
-from cognitive_architecture.classifiers.classifier import classify_user_input
-

 async def fetch_document_vectordb_namespace(
    session: AsyncSession, user_id: str, namespace_id: str, doc_id: str = None
@ -553,7 +553,7 @@ async def user_context_enrichment(
        relevant_summary_id = None

        for _ in range(max_attempts):
-            relevant_summary_id = await classify_call(
+            relevant_summary_id = await classify_summary(
                query=query, document_summaries=str(summaries)
            )