From 0a38e09b3faf41909c951444a47c1de11ffed09d Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sun, 18 Feb 2024 20:10:08 +0100 Subject: [PATCH] Fixes to memory component --- .../classifiers/classify_documents.py | 21 +++++++++++-------- .../classifiers/classify_summary.py | 18 +++++++--------- .../classifiers/classify_user_input.py | 12 +++++------ .../classifiers/classify_user_query.py | 21 ++++++++----------- .../database/graphdb/networkx_graph.py | 2 +- 5 files changed, 36 insertions(+), 38 deletions(-) diff --git a/cognitive_architecture/classifiers/classify_documents.py b/cognitive_architecture/classifiers/classify_documents.py index d97d8ee1f..a1e6872fc 100644 --- a/cognitive_architecture/classifiers/classify_documents.py +++ b/cognitive_architecture/classifiers/classify_documents.py @@ -1,11 +1,10 @@ +""" This module contains the classifiers for the documents. """ import logging from langchain.prompts import ChatPromptTemplate import json - -# TO DO, ADD ALL CLASSIFIERS HERE - - +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI @@ -15,17 +14,19 @@ from ..database.vectordb.loaders.loaders import _document_loader config = Config() config.load() OPENAI_API_KEY = config.openai_key -from langchain.document_loaders import TextLoader -from langchain.document_loaders import DirectoryLoader + async def classify_documents(query: str, document_id: str, content: str): + """Classify the documents based on the query and content.""" document_context = content logging.info("This is the document context", document_context) llm = ChatOpenAI(temperature=0, model=config.model) prompt_classify = ChatPromptTemplate.from_template( - """You are a summarizer and classifier. Determine what book this is and where does it belong in the output : {query}, Id: {d_id} Document context is: {context}""" + """You are a summarizer and classifier. + Determine what book this is and where does it belong in the output : + {query}, Id: {d_id} Document context is: {context}""" ) json_structure = [ { @@ -36,7 +37,8 @@ async def classify_documents(query: str, document_id: str, content: str): "properties": { "DocumentCategory": { "type": "string", - "description": "The classification of documents in groups such as legal, medical, etc.", + "description": "The classification of documents " + "in groups such as legal, medical, etc.", }, "Title": { "type": "string", @@ -58,7 +60,8 @@ async def classify_documents(query: str, document_id: str, content: str): classifier_output = await chain_filter.ainvoke( {"query": query, "d_id": document_id, "context": str(document_context)} ) + arguments_str = classifier_output.additional_kwargs["function_call"]["arguments"] logging.info("This is the arguments string %s", arguments_str) arguments_dict = json.loads(arguments_str) - return arguments_dict \ No newline at end of file + return arguments_dict diff --git a/cognitive_architecture/classifiers/classify_summary.py b/cognitive_architecture/classifiers/classify_summary.py index d437c658f..9ce7b81db 100644 --- a/cognitive_architecture/classifiers/classify_summary.py +++ b/cognitive_architecture/classifiers/classify_summary.py @@ -1,22 +1,16 @@ +""" This module contains the function to classify a summary of a document. """ import logging from langchain.prompts import ChatPromptTemplate import json - -# TO DO, ADD ALL CLASSIFIERS HERE - - from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI from ..config import Config -from ..database.vectordb.loaders.loaders import _document_loader config = Config() config.load() OPENAI_API_KEY = config.openai_key -from langchain.document_loaders import TextLoader -from langchain.document_loaders import DirectoryLoader @@ -24,9 +18,12 @@ from langchain.document_loaders import DirectoryLoader async def classify_summary(query, document_summaries): + """Classify the documents based on the query and content.""" llm = ChatOpenAI(temperature=0, model=config.model) prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. Determine what document are relevant for the given query: {query}, Document summaries and ids:{document_summaries}""" + """You are a classifier. Determine what document + are relevant for the given query: {query}, + Document summaries and ids:{document_summaries}""" ) json_structure = [ { @@ -37,7 +34,8 @@ async def classify_summary(query, document_summaries): "properties": { "DocumentSummary": { "type": "string", - "description": "The summary of the document and the topic it deals with.", + "description": "The summary of the document " + "and the topic it deals with.", }, "d_id": {"type": "string", "description": "The id of the document"}, }, @@ -59,4 +57,4 @@ async def classify_summary(query, document_summaries): logging.info("This is the classifier id %s", classfier_id) - return classfier_id \ No newline at end of file + return classfier_id diff --git a/cognitive_architecture/classifiers/classify_user_input.py b/cognitive_architecture/classifiers/classify_user_input.py index 840039ee4..24054dcdd 100644 --- a/cognitive_architecture/classifiers/classify_user_input.py +++ b/cognitive_architecture/classifiers/classify_user_input.py @@ -1,10 +1,10 @@ +""" This module contains the classifiers for the documents. """ + import logging from langchain.prompts import ChatPromptTemplate import json -# TO DO, ADD ALL CLASSIFIERS HERE - from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI @@ -15,14 +15,14 @@ from ..database.vectordb.loaders.loaders import _document_loader config = Config() config.load() OPENAI_API_KEY = config.openai_key -from langchain.document_loaders import TextLoader -from langchain.document_loaders import DirectoryLoader - async def classify_user_input(query, input_type): + """ Classify the user input based on the query and input type.""" llm = ChatOpenAI(temperature=0, model=config.model) prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. Determine with a True or False if the following input: {query}, is relevant for the following memory category: {input_type}""" + """You are a classifier. + Determine with a True or False if the following input: {query}, + is relevant for the following memory category: {input_type}""" ) json_structure = [ { diff --git a/cognitive_architecture/classifiers/classify_user_query.py b/cognitive_architecture/classifiers/classify_user_query.py index bbef1b137..157c70751 100644 --- a/cognitive_architecture/classifiers/classify_user_query.py +++ b/cognitive_architecture/classifiers/classify_user_query.py @@ -1,13 +1,10 @@ -import logging - +""" This module contains the function to classify the user query. """ from langchain.prompts import ChatPromptTemplate import json - -# TO DO, ADD ALL CLASSIFIERS HERE - - from langchain.chains import create_extraction_chain from langchain.chat_models import ChatOpenAI +from langchain.document_loaders import TextLoader +from langchain.document_loaders import DirectoryLoader from ..config import Config from ..database.vectordb.loaders.loaders import _document_loader @@ -15,14 +12,15 @@ from ..database.vectordb.loaders.loaders import _document_loader config = Config() config.load() OPENAI_API_KEY = config.openai_key -from langchain.document_loaders import TextLoader -from langchain.document_loaders import DirectoryLoader async def classify_user_query(query, context, document_types): + """Classify the user query based on the context and document types.""" llm = ChatOpenAI(temperature=0, model=config.model) prompt_classify = ChatPromptTemplate.from_template( - """You are a classifier. You store user memories, thoughts and feelings. Determine if you need to use them to answer this query : {query}""" + """You are a classifier. + You store user memories, thoughts and feelings. + Determine if you need to use them to answer this query : {query}""" ) json_structure = [ { @@ -33,7 +31,8 @@ async def classify_user_query(query, context, document_types): "properties": { "UserQueryClassifier": { "type": "bool", - "description": "The classification of documents in groups such as legal, medical, etc.", + "description": "The classification of documents " + "in groups such as legal, medical, etc.", } }, "required": ["UserQueryClassifier"], @@ -50,7 +49,5 @@ async def classify_user_query(query, context, document_types): print("This is the arguments string", arguments_str) arguments_dict = json.loads(arguments_str) classfier_value = arguments_dict.get("UserQueryClassifier", None) - print("This is the classifier value", classfier_value) - return classfier_value diff --git a/cognitive_architecture/database/graphdb/networkx_graph.py b/cognitive_architecture/database/graphdb/networkx_graph.py index 4e63e39ef..ae005ccdf 100644 --- a/cognitive_architecture/database/graphdb/networkx_graph.py +++ b/cognitive_architecture/database/graphdb/networkx_graph.py @@ -4,7 +4,7 @@ import networkx as nx class NetworkXGraphDB: - def __init__(self, filename="networkx_graph.pkl"): + def __init__(self, filename="cognee_graph.pkl"): self.filename = filename try: self.graph = self.load_graph() # Attempt to load an existing graph