From 39140620f66784a39b316e23eae8b36028634c2b Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Wed, 23 Aug 2023 00:14:40 +0200
Subject: [PATCH 01/12] added extended logic

---
 level_2/api.py                                | 167 ++++---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 442 ++++++++++++------
 2 files changed, 389 insertions(+), 220 deletions(-)

diff --git a/level_2/api.py b/level_2/api.py
index d2cc74e6c..e79309de4 100644
--- a/level_2/api.py
+++ b/level_2/api.py
@@ -1,3 +1,5 @@
+from io import BytesIO
+
 from langchain.document_loaders import PyPDFLoader
 
 from level_2_pdf_vectorstore__dlt_contracts import Memory
@@ -27,7 +29,7 @@ from dotenv import load_dotenv
 
 
 load_dotenv()
-
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 
 app = FastAPI(debug=True)
 
@@ -63,82 +65,73 @@ def health_check():
 
 #curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
 
-from fastapi import FastAPI, UploadFile, File
-import requests
-import os
-import json
-
-app = FastAPI()
-
-
-from io import BytesIO
-
 
 class Payload(BaseModel):
     payload: Dict[str, Any]
 
-@app.post("/upload/", response_model=dict)
-async def upload_pdf_and_payload(
-        payload: Payload,
-        # files: List[UploadFile] = File(...),
-):
-    try:
-        # Process the payload
-        decoded_payload = payload.payload
-    # except:
-    #     pass
-    #
-    # return JSONResponse(content={"response": decoded_payload}, status_code=200)
-
-        # Download the remote PDF if URL is provided
-        if 'pdf_url' in decoded_payload:
-            pdf_response = requests.get(decoded_payload['pdf_url'])
-            pdf_content = pdf_response.content
-
-            logging.info("Downloaded PDF from URL")
-
-            # Create an in-memory file-like object for the PDF content
-            pdf_stream = BytesIO(pdf_content)
-
-            contents = pdf_stream.read()
-
-            tmp_location = os.path.join('/tmp', "tmp.pdf")
-            with open(tmp_location, 'wb') as tmp_file:
-                tmp_file.write(contents)
-
-            logging.info("Wrote PDF from URL")
-
-            # Process the PDF using PyPDFLoader
-            loader = PyPDFLoader(tmp_location)
-            pages = loader.load_and_split()
-            logging.info(" PDF split into pages")
-            Memory_ = Memory(index_name="my-agent", user_id='555' )
-            await Memory_.async_init()
-            Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
-
-
-            # Run the buffer
-            response = Memory_._run_buffer(user_input="I want to get a schema for my data")
-            return JSONResponse(content={"response": response}, status_code=200)
-
-            #to do: add the user id to the payload
-            #to do add the raw pdf to payload
-            # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
-            # print(bb)
-
-
-    except Exception as e:
-
-        return {"error": str(e)}
-            # Here you can perform your processing on the PDF contents
-            # results.append({"filename": file.filename, "size": len(contents)})
-
-            # Append the in-memory file to the files list
-            # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
-
+# @app.post("/upload/", response_model=dict)
+# async def upload_pdf_and_payload(
+#         payload: Payload,
+#         # files: List[UploadFile] = File(...),
+# ):
+#     try:
+#         # Process the payload
+#         decoded_payload = payload.payload
+#     # except:
+#     #     pass
+#     #
+#     # return JSONResponse(content={"response": decoded_payload}, status_code=200)
+#
+#         # Download the remote PDF if URL is provided
+#         if 'pdf_url' in decoded_payload:
+#             pdf_response = requests.get(decoded_payload['pdf_url'])
+#             pdf_content = pdf_response.content
+#
+#             logging.info("Downloaded PDF from URL")
+#
+#             # Create an in-memory file-like object for the PDF content
+#             pdf_stream = BytesIO(pdf_content)
+#
+#             contents = pdf_stream.read()
+#
+#             tmp_location = os.path.join('/tmp', "tmp.pdf")
+#             with open(tmp_location, 'wb') as tmp_file:
+#                 tmp_file.write(contents)
+#
+#             logging.info("Wrote PDF from URL")
+#
+#             # Process the PDF using PyPDFLoader
+#             loader = PyPDFLoader(tmp_location)
+#             pages = loader.load_and_split()
+#             logging.info(" PDF split into pages")
+#             Memory_ = Memory(index_name="my-agent", user_id='555' )
+#             await Memory_.async_init()
+#             Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
+#
+#
+#             # Run the buffer
+#             response = Memory_._run_buffer(user_input="I want to get a schema for my data")
+#             return JSONResponse(content={"response": response}, status_code=200)
+#
+#             #to do: add the user id to the payload
+#             #to do add the raw pdf to payload
+#             # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
+#             # print(bb)
+#
+#
+#     except Exception as e:
+#
+#         return {"error": str(e)}
+#             # Here you can perform your processing on the PDF contents
+#             # results.append({"filename": file.filename, "size": len(contents)})
+#
+#             # Append the in-memory file to the files list
+#             # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
+#
 
 
 def memory_factory(memory_type):
+    load_dotenv()
     class Payload(BaseModel):
         payload: Dict[str, Any]
     @app.post("/{memory_type}/add-memory", response_model=dict)
@@ -148,16 +141,40 @@ def memory_factory(memory_type):
     ):
         try:
 
+            logging.info(" Init PDF processing")
+
 
             decoded_payload = payload.payload
 
-            Memory_ = Memory( user_id='555')
+            if 'pdf_url' in decoded_payload:
+                pdf_response = requests.get(decoded_payload['pdf_url'])
+                pdf_content = pdf_response.content
 
-            await Memory_.async_init()
+                logging.info("Downloaded PDF from URL")
 
-            memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
-            output= memory_class(observation=decoded_payload['prompt'])
-            return JSONResponse(content={"response": output}, status_code=200)
+                # Create an in-memory file-like object for the PDF content
+                pdf_stream = BytesIO(pdf_content)
+
+                contents = pdf_stream.read()
+
+                tmp_location = os.path.join('/tmp', "tmp.pdf")
+                with open(tmp_location, 'wb') as tmp_file:
+                    tmp_file.write(contents)
+
+                logging.info("Wrote PDF from URL")
+
+                # Process the PDF using PyPDFLoader
+                loader = PyPDFLoader(tmp_location)
+                # pages = loader.load_and_split()
+                logging.info(" PDF split into pages")
+
+                Memory_ = Memory(user_id='555')
+
+                await Memory_.async_init()
+
+                memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
+                output= await memory_class(observation=str(loader))
+                return JSONResponse(content={"response": output}, status_code=200)
 
         except Exception as e:
 
diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 1f8b1cdf9..e087cafcd 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -1,4 +1,4 @@
-#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
+# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
 
 import dlt
 from langchain import PromptTemplate, LLMChain
@@ -15,7 +15,7 @@ from deep_translator import (GoogleTranslator)
 from langchain.chat_models import ChatOpenAI
 from langchain.schema import LLMResult, HumanMessage
 from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
-
+from pydantic import BaseModel, Field, parse_obj_as
 from langchain.memory import VectorStoreRetrieverMemory
 from marvin import ai_classifier
 from enum import Enum
@@ -29,14 +29,13 @@ from langchain.tools import tool
 from langchain.vectorstores import Weaviate
 import uuid
 from dotenv import load_dotenv
+
 load_dotenv()
 from pathlib import Path
 from langchain import OpenAI, LLMMathChain
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts import ChatPromptTemplate
 
-
-
 import os
 
 from datetime import datetime
@@ -56,12 +55,17 @@ from langchain.schema import Document, SystemMessage, HumanMessage
 from langchain.vectorstores import Weaviate
 import weaviate
 import uuid
+
 load_dotenv()
 
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+
 
 class MyCustomSyncHandler(BaseCallbackHandler):
     def on_llm_new_token(self, token: str, **kwargs) -> None:
         print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")
+
+
 class MyCustomAsyncHandler(AsyncCallbackHandler):
     """Async callback handler that can be used to handle callbacks from langchain."""
 
@@ -80,14 +84,17 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
         await asyncio.sleep(0.3)
         print("Hi! I just woke up. Your llm is ending")
 
+
 class VectorDB:
     OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
-    def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_memory_id:str='00000', st_memory_id:str='0000', buffer_id:str='0000', db_type: str = "pinecone",  namespace:str = None):
+
+    def __init__(self, user_id: str, index_name: str, memory_id: str, ltm_memory_id: str = '00000',
+                 st_memory_id: str = '0000', buffer_id: str = '0000', db_type: str = "pinecone", namespace: str = None):
         self.user_id = user_id
         self.index_name = index_name
         self.db_type = db_type
-        self.namespace=namespace
+        self.namespace = namespace
         self.memory_id = memory_id
         self.ltm_memory_id = ltm_memory_id
         self.st_memory_id = st_memory_id
@@ -118,7 +125,7 @@ class VectorDB:
         )
         return vectorstore
 
-    def init_weaviate_client(self, namespace:str):
+    def init_weaviate_client(self, namespace: str):
         embeddings = OpenAIEmbeddings()
         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
         client = weaviate.Client(
@@ -131,7 +138,7 @@ class VectorDB:
         )
         return client
 
-    def init_weaviate(self, namespace:str):
+    def init_weaviate(self, namespace: str):
         embeddings = OpenAIEmbeddings()
         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
         client = weaviate.Client(
@@ -152,15 +159,12 @@ class VectorDB:
         )
         return retriever
 
-    async def add_memories(self, observation: str,  page: str = "", source: str = ""):
+    async def add_memories(self, observation: str, params: dict = None):
         if self.db_type == "pinecone":
             # Update Pinecone memories here
             vectorstore: Pinecone = Pinecone.from_existing_index(
                 index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
             )
-
-
-
             retriever = vectorstore.as_retriever()
             retriever.add_documents(
                 [
@@ -170,8 +174,17 @@ class VectorDB:
                             "inserted_at": datetime.now(),
                             "text": observation,
                             "user_id": self.user_id,
-                            "page": page,
-                            "source": source
+                            "version": params.get('version', None) or "",
+                            "agreement_id": params.get('agreement_id', None) or "",
+                            "privacy_policy": params.get('privacy_policy', None) or "",
+                            "terms_of_service": params.get('terms_of_service', None) or "",
+                            "format": params.get('format', None) or "",
+                            "schema_version": params.get('schema_version', None) or "",
+                            "checksum": params.get('checksum', None) or "",
+                            "owner": params.get('owner', None) or "",
+                            "license": params.get('license', None) or "",
+                            "validity_start": params.get('validity_start', None) or "",
+                            "validity_end": params.get('validity_end', None) or ""
                         },
                         namespace=self.namespace,
                     )
@@ -180,10 +193,9 @@ class VectorDB:
         elif self.db_type == "weaviate":
             # Update Weaviate memories here
             print(self.namespace)
-            retriever = self.init_weaviate( self.namespace)
+            retriever = self.init_weaviate(self.namespace)
 
-
-            return  retriever.add_documents([
+            return retriever.add_documents([
                 Document(
                     metadata={
                         "text": observation,
@@ -192,18 +204,30 @@ class VectorDB:
                         "ltm_memory_id": str(self.ltm_memory_id),
                         "st_memory_id": str(self.st_memory_id),
                         "buffer_id": str(self.buffer_id),
+                        "version": params.get('version', None) or "",
+                        "agreement_id": params.get('agreement_id', None) or "",
+                        "privacy_policy": params.get('privacy_policy', None) or "",
+                        "terms_of_service": params.get('terms_of_service', None) or "",
+                        "format": params.get('format', None) or "",
+                        "schema_version": params.get('schema_version', None) or "",
+                        "checksum": params.get('checksum', None) or "",
+                        "owner": params.get('owner', None) or "",
+                        "license": params.get('license', None) or "",
+                        "validity_start": params.get('validity_start', None) or "",
+                        "validity_end": params.get('validity_end', None) or ""
 
                         # **source_metadata,
                     },
                     page_content=observation,
                 )]
             )
+
     # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
     #     return Pinecone.from_existing_index(
     #         index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
     #     )
 
-    async def fetch_memories(self, observation: str, namespace:str, params = None):
+    async def fetch_memories(self, observation: str, namespace: str, params: dict = None):
         if self.db_type == "pinecone":
             # Fetch Pinecone memories here
             pass
@@ -227,22 +251,57 @@ class VectorDB:
             print(str(datetime.now()))
             print(observation)
             if namespace is None:
-                namespace= self.namespace
+                namespace = self.namespace
 
-            params_user_id= {
-              "path": ["user_id"],
-              "operator": "Like",
-              "valueText": self.user_id
+            params_user_id = {
+                "path": ["user_id"],
+                "operator": "Like",
+                "valueText": self.user_id
             }
 
             if params:
-                query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id", "st_memory_id", "buffer_id"]).with_where(params).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do()
+                query_output = client.query.get(namespace, ["text"
+                    , "user_id"
+                    , "memory_id"
+                    , "ltm_memory_id"
+                    , "st_memory_id"
+                    , "buffer_id"
+                    , "version",
+                                                            "agreement_id",
+                                                            "privacy_policy",
+                                                            "terms_of_service",
+                                                            "format",
+                                                            "schema_version",
+                                                            "checksum",
+                                                            "owner",
+                                                            "license",
+                                                            "validity_start",
+                                                            "validity_end"]).with_where(params).with_additional(
+                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
                 return query_output
             else:
-                query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id","st_memory_id", "buffer_id"]).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do()
+                query_output = client.query.get(namespace, ["text",
+                                                            "user_id",
+                                                            "memory_id",
+                                                            "ltm_memory_id",
+                                                            "st_memory_id",
+                                                            "buffer_id",
+                                                            "version",
+                                                            "agreement_id",
+                                                            "privacy_policy",
+                                                            "terms_of_service",
+                                                            "format",
+                                                            "schema_version",
+                                                            "checksum",
+                                                            "owner",
+                                                            "license",
+                                                            "validity_start",
+                                                            "validity_end"
+                                                            ]).with_additional(
+                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
                 return query_output
 
-    async def delete_memories(self, params:dict = None):
+    async def delete_memories(self, params: dict = None):
         client = self.init_weaviate_client(self.namespace)
         if params:
             where_filter = {
@@ -256,7 +315,7 @@ class VectorDB:
                 where=where_filter,
             )
         else:
-            #Delete all objects
+            # Delete all objects
 
             return client.batch.delete_objects(
                 class_name=self.namespace,
@@ -267,8 +326,9 @@ class VectorDB:
                 }
             )
 
-    def update_memories(self, observation, namespace:str,params:dict = None):
+    def update_memories(self, observation, namespace: str, params: dict = None):
         client = self.init_weaviate_client(self.namespace)
+
         client.data_object.update(
             data_object={
                 "text": observation,
@@ -277,6 +337,17 @@ class VectorDB:
                 "ltm_memory_id": str(self.ltm_memory_id),
                 "st_memory_id": str(self.st_memory_id),
                 "buffer_id": str(self.buffer_id),
+                "version": params.get('version', None) or "",
+                "agreement_id": params.get('agreement_id', None) or "",
+                "privacy_policy": params.get('privacy_policy', None) or "",
+                "terms_of_service": params.get('terms_of_service', None) or "",
+                "format": params.get('format', None) or "",
+                "schema_version": params.get('schema_version', None) or "",
+                "checksum": params.get('checksum', None) or "",
+                "owner": params.get('owner', None) or "",
+                "license": params.get('license', None) or "",
+                "validity_start": params.get('validity_start', None) or "",
+                "validity_end": params.get('validity_end', None) or ""
 
                 # **source_metadata,
 
@@ -288,33 +359,39 @@ class VectorDB:
         return
 
 
-
 class SemanticMemory:
-    def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="SEMANTICMEMORY"):
+    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
+                 namespace: str = "SEMANTICMEMORY"):
         # Add any semantic memory-related attributes or setup here
-        self.user_id=user_id
+        self.user_id = user_id
         self.index_name = index_name
         self.namespace = namespace
         self.semantic_memory_id = str(uuid.uuid4())
         self.memory_id = memory_id
         self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
+        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
+                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
         self.db_type = db_type
 
-
-
-    async def _add_memories(self, semantic_memory: str="None") ->list[str]:
+    async def _add_memories(self, semantic_memory: str = "None", params: dict = None) -> list[str]:
         """Update semantic memory for the user"""
 
         if self.db_type == "weaviate":
-            out = await self.vector_db.add_memories( observation = semantic_memory)
-            return out
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=400,
+                chunk_overlap=20,
+                length_function=len,
+                is_separator_regex=False,
+            )
+            texts = text_splitter.create_documents([semantic_memory])
+            for text in texts:
+                out = await self.vector_db.add_memories(observation=text.page_content, params=params)
+                return out
 
         elif self.db_type == "pinecone":
             pass
 
-
-    async def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]:
+    async def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
 
@@ -325,42 +402,42 @@ class SemanticMemory:
         elif self.db_type == "pinecone":
             pass
 
-    async def _delete_memories(self,params:str=None) -> Coroutine[Any, Any, Any]:
+    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
 
         if self.db_type == "weaviate":
 
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
 
         elif self.db_type == "pinecone":
             pass
 
+
 class EpisodicMemory:
-    def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="EPISODICMEMORY"):
+    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
+                 namespace: str = "EPISODICMEMORY"):
         # Add any semantic memory-related attributes or setup here
-        self.user_id=user_id
+        self.user_id = user_id
         self.index_name = index_name
         self.namespace = namespace
         self.episodic_memory_id = str(uuid.uuid4())
         self.memory_id = memory_id
         self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
+        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
+                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
         self.db_type = db_type
 
-
-
-    async def _add_memories(self ,observation:str=None) -> list[str]:
+    async def _add_memories(self, observation: str = None, params: dict = None) -> list[str]:
         """Update semantic memory for the user"""
 
         if self.db_type == "weaviate":
-            return await self.vector_db.add_memories( observation = observation)
+            return await self.vector_db.add_memories(observation=observation, params=params)
 
         elif self.db_type == "pinecone":
             pass
 
-
-    def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]:
+    def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
 
@@ -370,19 +447,22 @@ class EpisodicMemory:
 
         elif self.db_type == "pinecone":
             pass
-    async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]:
+
+    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
 
         if self.db_type == "weaviate":
 
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
 
         elif self.db_type == "pinecone":
             pass
 
+
 class LongTermMemory:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
+                 db_type: str = "weaviate"):
         self.user_id = user_id
         self.memory_id = memory_id
         self.ltm_memory_id = str(uuid.uuid4())
@@ -390,14 +470,17 @@ class LongTermMemory:
         self.namespace = namespace
         self.db_type = db_type
         # self.episodic_memory = EpisodicMemory()
-        self.semantic_memory = SemanticMemory(user_id = self.user_id, memory_id=self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=self.index_name, db_type=self.db_type)
+        self.semantic_memory = SemanticMemory(user_id=self.user_id, memory_id=self.memory_id,
+                                              ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
+                                              db_type=self.db_type)
         self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id,
                                               ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
                                               db_type=self.db_type)
 
 
 class ShortTermMemory:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
+                 db_type: str = "weaviate"):
         # Add any short-term memory-related attributes or setup here
         self.user_id = user_id
         self.memory_id = memory_id
@@ -405,12 +488,13 @@ class ShortTermMemory:
         self.db_type = db_type
         self.stm_memory_id = str(uuid.uuid4())
         self.index_name = index_name
-        self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type)
-
+        self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id,
+                                              index_name=self.index_name, db_type=self.db_type)
 
 
 class EpisodicBuffer:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str='EPISODICBUFFER', db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None,
+                 namespace: str = 'EPISODICBUFFER', db_type: str = "weaviate"):
         # Add any short-term memory-related attributes or setup here
         self.user_id = user_id
         self.memory_id = memory_id
@@ -418,7 +502,7 @@ class EpisodicBuffer:
         self.db_type = db_type
         self.st_memory_id = "blah"
         self.index_name = index_name
-        self.llm= ChatOpenAI(
+        self.llm = ChatOpenAI(
             temperature=0.0,
             max_tokens=1200,
             openai_api_key=os.environ.get('OPENAI_API_KEY'),
@@ -426,10 +510,8 @@ class EpisodicBuffer:
             callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
         )
 
-
         # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
 
-
         def _compute_weights(self, context: str):
             """Computes the weights for the buffer"""
             pass
@@ -456,57 +538,58 @@ class EpisodicBuffer:
     #     json_data = json.dumps(chain_result)
     #     return json_data
 
-    async def _fetch_memories(self, observation: str,namespace:str) ->  str:
+    async def _fetch_memories(self, observation: str, namespace: str) -> str:
         vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                              index_name=self.index_name, db_type=self.db_type, namespace=namespace)
 
-
         query = await vector_db.fetch_memories(observation=observation)
         return query
 
-    async def _add_memories(self, observation: str,namespace:str):
+    async def _add_memories(self, observation: str, namespace: str, params: dict = None):
         vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                              index_name=self.index_name, db_type=self.db_type, namespace=namespace)
 
-
-        query = await vector_db.add_memories(observation)
+        query = await vector_db.add_memories(observation, params=params)
         return query
 
-    async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]:
+    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
 
         if self.db_type == "weaviate":
 
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
 
         elif self.db_type == "pinecone":
             pass
 
-    async def freshness(self, observation: str,namespace:str) -> str:
-        """Freshness - Score between 1 and 5  on how often was the information processed in episodic memory in the past"""
+    # async def freshness(self, observation: str,namespace:str) -> str:
+    #     """Freshness - Score between 1 and 5  on how often was the information processed in episodic memory in the past"""
+    #
+    #     memory = Memory(user_id=self.user_id)
+    #     await memory.async_init()
+    #
+    #     # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
+    #     # print(gg)
+    #
+    #
+    #
+    #     ggur = await memory._fetch_episodic_memory(observation=observation)
+    #     print(ggur)
 
-        memory = Memory(user_id=self.user_id)
-        await memory.async_init()
+    # @ai_classifier
+    # class MemoryRoute(Enum):
+    #     """Represents classifer for semantic fetching of memories"""
+    #
+    #     storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
+    #     raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
+    #     raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
+    #     long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
+    #     raw_information_to_store_as_events = "EVENTBUFFER"
+    #
+    # namespace= MemoryRoute(observation)
 
-        # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
-        # print(gg)
-
-        ggur = await memory._fetch_episodic_memory(observation="bla bla bla")
-        print(ggur)
-        # @ai_classifier
-        # class MemoryRoute(Enum):
-        #     """Represents classifer for semantic fetching of memories"""
-        #
-        #     storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-        #     raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-        #     raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-        #     long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-        #     raw_information_to_store_as_events = "EVENTBUFFER"
-        #
-        # namespace= MemoryRoute(observation)
-
-        return ggur
+    # return ggur
 
     async def encoding(self, document: str, namespace: str = "EPISODICBUFFER") -> None:
         """Encoding for the buffer, stores raw data in the buffer
@@ -522,21 +605,90 @@ class EpisodicBuffer:
         # Here we define the user prompt and the structure of the output we desire
         # prompt = output[0].page_content
 
+        # file_upload
+        #
 
         if content is not None:
 
-            #We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
-            output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
-            await self.encoding(output_translated)
-            freshness_score =await  self.freshness(output_translated, namespace="EPISODICBUFFER")
-            print(freshness_score)
+            # operations -> translate, structure, load to db
+
+            list_of_operations = ["translate", "structure", "load to db"]
+
+            prompt_filter = ChatPromptTemplate.from_template(
+                "Filter and remove uneccessary information that is not relevant in the user query {query}")
+            chain_filter = prompt_filter | self.llm
+            output = await chain_filter.ainvoke({"query": user_input})
+
+            class Task(BaseModel):
+                """Schema for an individual task."""
+                task_order: str = Field(..., description="The order at which the task needs to be performed")
+                task_name: str = Field(None, description="The task that needs to be performed")
+                operation: str = Field(None, description="The operation to be performed")
+
+            class TaskList(BaseModel):
+                """Schema for the record containing a list of tasks."""
+                tasks: List[Task] = Field(..., description="List of tasks")
+
+            prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
+            # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
+            # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
+            prompt_msgs = [
+                SystemMessage(
+                    content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
+                ),
+                HumanMessage(content="Decompose based on the following prompt:"),
+                HumanMessagePromptTemplate.from_template("{input}"),
+                HumanMessage(content="Tips: Make sure to answer in the correct format"),
+                HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
+
+            ]
+            prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+            chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
+            from langchain.callbacks import get_openai_callback
+            with get_openai_callback() as cb:
+                output = await chain.arun(input=prompt_filter_chunk, verbose=True)
+                print(cb)
+            # output = json.dumps(output)
+            my_object = parse_obj_as(TaskList, output)
+            print("HERE IS THE OUTPUT", my_object.json())
+
+            data = json.loads(my_object.json())
+
+            # Extract the list of tasks
+            tasks_list = data["tasks"]
+
+            for task in tasks_list:
+                class TranslateText(BaseModel):
+                    observation: str = Field(
+                        description="observation we want to translate"
+                    )
+
+                @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
+                def translate_to_en(observation, args_schema=TranslateText):
+                    """Translate to English"""
+                    out = GoogleTranslator(source='auto', target='en').translate(text=observation)
+                    return out
+
+                agent = initialize_agent(
+                    llm=self.llm,
+                    tools=[translate_to_en],
+                    agent=AgentType.OPENAI_FUNCTIONS,
+
+                    verbose=True,
+                )
+
+                agent.run(task)
+
+            # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
+            # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
+            # await self.encoding(output_translated)
+            # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
+            # print(freshness_score)
             # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
-            user_relevance_score ="0"
+            user_relevance_score = "0"
             # similarity score between the user input and the content already available in the buffer
 
-
-
-
+            # write this to episodic memory
 
             # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
             # chain_filter = prompt_filter | self.llm
@@ -546,12 +698,12 @@ class EpisodicBuffer:
 
         if content is None:
             # Sensory and Linguistic Processing
-            prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
+            prompt_filter = ChatPromptTemplate.from_template(
+                "Filter and remove uneccessary information that is not relevant in the user query {query}")
             chain_filter = prompt_filter | self.llm
             output = await chain_filter.ainvoke({"query": user_input})
             translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
 
-
             def top_down_processing():
                 """Top-down processing"""
                 pass
@@ -560,29 +712,19 @@ class EpisodicBuffer:
                 """Bottom-up processing"""
                 pass
 
-
             def interactive_processing():
                 """interactive processing"""
                 pass
 
+            working_memory_activation = "bla"
 
-
-
-
-            working_memory_activation =  "bla"
-
-
-            prompt_chunk = ChatPromptTemplate.from_template("Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
+            prompt_chunk = ChatPromptTemplate.from_template(
+                "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
             chain_chunk = prompt_chunk | self.llm
             output_chunks = await chain_chunk.ainvoke({"query": output.content})
 
-
             print(output_chunks.content)
 
-
-
-
-
         # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
         # retriever = WeaviateHybridSearchRetriever(
         #     client=client,
@@ -598,7 +740,6 @@ class EpisodicBuffer:
 
         # query = vector_db.
 
-
         # retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
         # memory = VectorStoreRetrieverMemory(retriever=retriever)
         # class PromptWrapper(BaseModel):
@@ -728,10 +869,8 @@ class EpisodicBuffer:
         # return output
 
 
-
-
-#DEFINE STM
-#DEFINE LTM
+# DEFINE STM
+# DEFINE LTM
 
 class Memory:
     load_dotenv()
@@ -739,7 +878,7 @@ class Memory:
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 
     def __init__(self, user_id: str = "676", index_name: str = None, knowledge_source: str = None,
-                 knowledge_type: str = None, db_type:str="weaviate", namespace:str=None) -> None:
+                 knowledge_type: str = None, db_type: str = "weaviate", namespace: str = None) -> None:
         self.user_id = user_id
         self.index_name = index_name
         self.db_type = db_type
@@ -752,12 +891,13 @@ class Memory:
         load_dotenv()
 
     # Asynchronous factory function for creating LongTermMemory
-    async def async_create_long_term_memory(self,user_id, memory_id, index_name, namespace, db_type):
+    async def async_create_long_term_memory(self, user_id, memory_id, index_name, namespace, db_type):
         # Perform asynchronous initialization steps if needed
         return LongTermMemory(
             user_id=user_id, memory_id=memory_id, index_name=index_name,
             namespace=namespace, db_type=db_type
         )
+
     async def async_init(self):
         # Asynchronous initialization of LongTermMemory and ShortTermMemory
         self.long_term_memory = await self.async_create_long_term_memory(
@@ -785,9 +925,9 @@ class Memory:
         #     user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type
         # )
 
-    async def _add_semantic_memory(self, semantic_memory:str):
+    async def _add_semantic_memory(self, semantic_memory: str, params: dict = None):
         return await self.long_term_memory.semantic_memory._add_memories(
-            semantic_memory=semantic_memory
+            semantic_memory=semantic_memory, params=params
 
         )
 
@@ -795,67 +935,79 @@ class Memory:
         return await self.long_term_memory.semantic_memory._fetch_memories(
             observation=observation, params=params
 
-
-
         )
-    async def _delete_semantic_memory(self,  params:str=None):
+
+    async def _delete_semantic_memory(self, params: str = None):
         return await self.long_term_memory.semantic_memory._delete_memories(
             params=params
         )
 
-    async def _add_episodic_memory(self, observation:str):
+    async def _add_episodic_memory(self, observation: str, params: dict = None):
         return await self.long_term_memory.episodic_memory._add_memories(
-            observation=observation
+            observation=observation, params=params
 
         )
 
-    async def _fetch_episodic_memory(self, observation, params:str=None):
+    async def _fetch_episodic_memory(self, observation, params: str = None):
         return await self.long_term_memory.episodic_memory._fetch_memories(
             observation=observation, params=params
         )
 
-    async def _delete_episodic_memory(self, params:str=None):
+    async def _delete_episodic_memory(self, params: str = None):
         return await self.long_term_memory.episodic_memory._delete_memories(
-             params=params
+            params=params
         )
 
-    async def _run_buffer(self, user_input:str, content:str=None):
+    async def _run_buffer(self, user_input: str, content: str = None):
         return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content)
 
+    async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None):
+        return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace,
+                                                                          params=params)
 
-
-    async def _add_buffer_memory(self, user_input: str, namespace: str = None ):
-        return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace)
-
-    async def _fetch_buffer_memory(self, user_input: str, namespace: str = None ):
+    async def _fetch_buffer_memory(self, user_input: str, namespace: str = None):
         return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace)
 
-    async def _delete_buffer_memory(self, params:str=None):
+    async def _delete_buffer_memory(self, params: str = None):
         return await self.long_term_memory.episodic_buffer._delete_memories(
-             params=params
+            params=params
         )
 
 
-
 async def main():
     memory = Memory(user_id="123")
     await memory.async_init()
+    params = {
+        "version": "1.0",
+        "agreement_id": "AG123456",
+        "privacy_policy": "https://example.com/privacy",
+        "terms_of_service": "https://example.com/terms",
+        "format": "json",
+        "schema_version": "1.1",
+        "checksum": "a1b2c3d4e5f6",
+        "owner": "John Doe",
+        "license": "MIT",
+        "validity_start": "2023-08-01",
+        "validity_end": "2024-07-31"
+    }
 
-    # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
-    # print(gg)
-
-    gg = await memory._delete_episodic_memory()
+    gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ")
     print(gg)
 
-    # ggur = await memory._add_episodic_memory(observation = "bla bla bla")
+    # gg = await memory._delete_episodic_memory()
+    # print(gg)
+
+    # ggur = await memory._add_episodic_memory(observation = "bla bla bla", params=params)
     # print(ggur)
     # ggur = await memory._fetch_episodic_memory(observation = "bla bla bla")
     # print(ggur)
     # fff = await memory._fetch_memories_buffer(user_input = "bla bla bla", namespace="Test")
     # print(fff)
 
+
 if __name__ == "__main__":
     import asyncio
+
     asyncio.run(main())
 
     # bb = agent._update_semantic_memory(semantic_memory="Users core summary")

From 020570f57f899cad66d746c0fe56698afa8f5df1 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Wed, 23 Aug 2023 16:25:15 +0200
Subject: [PATCH 02/12] Added fix

---
 level_2/api.py                                |  35 +-
 .../level_2_pdf_vectorstore__dlt_contracts.py | 523 +++++++++++++-----
 level_2/poetry.lock                           |  40 +-
 level_2/pyproject.toml                        |   1 +
 4 files changed, 424 insertions(+), 175 deletions(-)

diff --git a/level_2/api.py b/level_2/api.py
index e79309de4..57942d582 100644
--- a/level_2/api.py
+++ b/level_2/api.py
@@ -168,12 +168,12 @@ def memory_factory(memory_type):
                 # pages = loader.load_and_split()
                 logging.info(" PDF split into pages")
 
-                Memory_ = Memory(user_id='555')
+                Memory_ = Memory(user_id=decoded_payload['user_id'])
 
                 await Memory_.async_init()
 
                 memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
-                output= await memory_class(observation=str(loader))
+                output= await memory_class(observation=str(loader), params =decoded_payload['params'])
                 return JSONResponse(content={"response": output}, status_code=200)
 
         except Exception as e:
@@ -181,7 +181,7 @@ def memory_factory(memory_type):
             return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
 
     @app.post("/{memory_type}/fetch-memory", response_model=dict)
-    async def add_memory(
+    async def fetch_memory(
             payload: Payload,
             # files: List[UploadFile] = File(...),
     ):
@@ -189,7 +189,7 @@ def memory_factory(memory_type):
 
             decoded_payload = payload.payload
 
-            Memory_ = Memory(user_id='555')
+            Memory_ = Memory(user_id=decoded_payload['user_id'])
 
             await Memory_.async_init()
 
@@ -202,7 +202,7 @@ def memory_factory(memory_type):
             return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
 
     @app.post("/{memory_type}/delete-memory", response_model=dict)
-    async def add_memory(
+    async def delete_memory(
             payload: Payload,
             # files: List[UploadFile] = File(...),
     ):
@@ -210,7 +210,7 @@ def memory_factory(memory_type):
 
             decoded_payload = payload.payload
 
-            Memory_ = Memory(user_id='555')
+            Memory_ = Memory(user_id=decoded_payload['user_id'])
 
             await Memory_.async_init()
 
@@ -227,6 +227,29 @@ for memory_type in memory_list:
     memory_factory(memory_type)
 
 
+
+@app.get("/available-buffer-actions", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = Memory_._available_operations()
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
+
+
 #
     #     # Process each uploaded PDF file
     #     results = []
diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index e087cafcd..8212db028 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -55,7 +55,7 @@ from langchain.schema import Document, SystemMessage, HumanMessage
 from langchain.vectorstores import Weaviate
 import weaviate
 import uuid
-
+import humanize
 load_dotenv()
 
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
@@ -542,7 +542,7 @@ class EpisodicBuffer:
         vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                              index_name=self.index_name, db_type=self.db_type, namespace=namespace)
 
-        query = await vector_db.fetch_memories(observation=observation)
+        query = await vector_db.fetch_memories(observation=observation, namespace=namespace)
         return query
 
     async def _add_memories(self, observation: str, namespace: str, params: dict = None):
@@ -555,175 +555,380 @@ class EpisodicBuffer:
     async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
         """Fetch related characteristics, preferences or dislikes for a user."""
         # self.init_pinecone(index_name=self.index)
+        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
+                             index_name=self.index_name, db_type=self.db_type, namespace=self.namespace)
 
         if self.db_type == "weaviate":
 
-            return await self.vector_db.delete_memories(params=params)
+            return await vector_db.delete_memories(params=params)
 
         elif self.db_type == "pinecone":
             pass
 
-    # async def freshness(self, observation: str,namespace:str) -> str:
-    #     """Freshness - Score between 1 and 5  on how often was the information processed in episodic memory in the past"""
-    #
-    #     memory = Memory(user_id=self.user_id)
-    #     await memory.async_init()
-    #
-    #     # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
-    #     # print(gg)
-    #
-    #
-    #
-    #     ggur = await memory._fetch_episodic_memory(observation=observation)
-    #     print(ggur)
+    async def freshness(self, observation: str,namespace:str=None) -> list[str]:
+        """Freshness - Score between 1 and 5  on how often was the information updated in episodic or semantic memory in the past"""
+
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+
+        lookup_value = await memory._fetch_episodic_memory(observation = observation)
+        unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"]
+
+        # Convert Unix timestamp to datetime
+        last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000)
+        time_difference = datetime.now() - last_update_datetime
+        time_difference_text = humanize.naturaltime(time_difference)
+        marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
+        @ai_classifier
+        class MemoryRoute(Enum):
+            """Represents classifer for freshness of memories"""
+
+            data_uploaded_now = "0"
+            data_uploaded_very_recently = "1"
+            data_uploaded_recently = "2"
+            data_uploaded_more_than_a_month_ago = "3"
+            data_uploaded_more_than_three_months_ago = "4"
+            data_uploaded_more_than_six_months_ago = "5"
+
+        namespace = MemoryRoute(str(time_difference_text))
+        return [namespace.value, lookup_value]
+
+
+    async def frequency(self, observation: str,namespace:str) -> list[str]:
+        """Frequency - Score between 1 and 5 on how often was the information processed in episodic memory in the past
+           Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """
+        client = self.init_weaviate_client(self.namespace)
+
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+
+        result_output = await memory._fetch_episodic_memory(observation=observation)
+        number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"])
+        number_of_total_events = client.query.aggregate( self.namespace).with_meta_count().do()
+        frequency = float(number_of_relevant_events) / float(number_of_total_events)
+        return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]]
+
+
+
+    async def relevance(self, observation: str) -> list[str]:
+        """Relevance - Score between 1 and 5 on how often was the final information relevant to the user in the past.
+           Stored in the episodic memory, mainly to show how well a buffer did the job
+           Starts at 1, gets updated based on the user feedback """
+
+        return ["5", "memory"]
+
+    async def saliency(self, observation: str) -> list[str]:
+        """Determines saliency by finding relevance between user input and document schema values.
+        After finding document schena value relevant for the user, it forms a new query based on the schema value and the user input """
+
+        return ["5", "memory"]
 
     # @ai_classifier
     # class MemoryRoute(Enum):
-    #     """Represents classifer for semantic fetching of memories"""
+    #     """Represents classifer for freshness of memories"""
     #
-    #     storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-    #     raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-    #     raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-    #     long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-    #     raw_information_to_store_as_events = "EVENTBUFFER"
+    #     data_uploaded_now = "0"
+    #     data_uploaded_very_recently = "1"
+    #     data_uploaded_recently = "2"
+    #     data_uploaded_more_than_a_month_ago = "3"
+    #     data_uploaded_more_than_three_months_ago = "4"
+    #     data_uploaded_more_than_six_months_ago = "5"
     #
     # namespace= MemoryRoute(observation)
 
     # return ggur
 
-    async def encoding(self, document: str, namespace: str = "EPISODICBUFFER") -> None:
+    async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params:dict=None) -> list[str]:
         """Encoding for the buffer, stores raw data in the buffer
         Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
         vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                              index_name=self.index_name, db_type=self.db_type, namespace=namespace)
 
-        query = await vector_db.add_memories(document)
+        query = await vector_db.add_memories(document, params=params)
         return query
 
-    async def main_buffer(self, user_input=None, content=None):
-        """AI buffer to convert unstructured data to structured data"""
-        # Here we define the user prompt and the structure of the output we desire
-        # prompt = output[0].page_content
+    async def available_operations(self) -> list[str]:
+        """Determines what operations are available for the user to process PDFs"""
 
+        return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory", "load to buffer"]
+
+    async def main_buffer(self, user_input=None, content=None, params=None):
+
+        """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
+
+
+        list_of_operations = await self.available_operations()
+
+
+        #we just filter the data here
+        prompt_filter = ChatPromptTemplate.from_template(
+            "Filter and remove uneccessary information that is not relevant in the user query {query}")
+        chain_filter = prompt_filter | self.llm
+        output = await chain_filter.ainvoke({"query": user_input})
+
+
+        if params:
+            context =[]
+
+            if "freshness" in params:
+                params.get('freshness', None) # get the value of freshness
+                freshness = await self.freshness(observation=str(output))
+                context.append(freshness)
+
+            elif "frequency" in params:
+                params.get('freshness', None)
+                frequency = await self.freshness(observation=str(output))
+                print("freshness", frequency)
+                context.append(frequency)
+
+                #fix this so it actually filters
+
+
+        else:
+            #defaults to semantic search
+            memory = Memory(user_id=self.user_id)
+            await memory.async_init()
+
+            lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output))
+            lookup_value_semantic = await memory._fetch_episodic_memory(observation=str(output))
+            lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+
+            context = [lookup_value_episodic, lookup_value_semantic, lookup_value_buffer]
+            #copy the context over into the buffer
+            #do i need to do it for the episodic + raw data, might make sense
+
+
+
+        print("HERE WE ARE")
+
+        class Task(BaseModel):
+            """Schema for an individual task."""
+            task_order: str = Field(..., description="The order at which the task needs to be performed")
+            task_name: str = Field(None, description="The task that needs to be performed")
+            operation: str = Field(None, description="The operation to be performed")
+
+        class TaskList(BaseModel):
+            """Schema for the record containing a list of tasks."""
+            tasks: List[Task] = Field(..., description="List of tasks")
+
+        prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
+        # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
+        # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
+        prompt_msgs = [
+            SystemMessage(
+                content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
+            ),
+            HumanMessage(content="Decompose based on the following prompt:"),
+            HumanMessagePromptTemplate.from_template("{input}"),
+            HumanMessage(content="Tips: Make sure to answer in the correct format"),
+            HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
+
+        ]
+        prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+        chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
+        from langchain.callbacks import get_openai_callback
+        with get_openai_callback() as cb:
+            output = await chain.arun(input=prompt_filter_chunk, verbose=True)
+            print(cb)
+        # output = json.dumps(output)
+        my_object = parse_obj_as(TaskList, output)
+        print("HERE IS THE OUTPUT", my_object.json())
+
+        data = json.loads(my_object.json())
+
+        # Extract the list of tasks
+        tasks_list = data["tasks"]
+
+        for task in tasks_list:
+            class TranslateText(BaseModel):
+                observation: str = Field(
+                    description="observation we want to translate"
+                )
+
+            @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
+            def translate_to_en(observation, args_schema=TranslateText):
+                """Translate to English"""
+                out = GoogleTranslator(source='auto', target='en').translate(text=observation)
+                return out
+
+            agent = initialize_agent(
+                llm=self.llm,
+                tools=[translate_to_en],
+                agent=AgentType.OPENAI_FUNCTIONS,
+
+                verbose=True,
+            )
+            print("HERE IS THE TASK", task)
+            output = agent.run(input=task)
+            print(output)
+            await self.encoding(output)
+
+
+
+        buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+
+        #json here
+
+        prompt_filter = ChatPromptTemplate.from_template(
+            "Format and collect all outputs from the tasks presented here {tasks} and their results {results}")
+        chain_filter_chunk = prompt_filter | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
+        output = await chain_filter_chunk.ainvoke({"query": buffer_result})
+        print("HERE IS THE OUTPUT", output)
+
+
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+
+        lookup_value = await memory._add_episodic_memory(observation=str(output), params={})
+
+
+        #load to buffer once is done
+
+        #fetch everything in the current session and load to episodic memory
+
+
+
+
+
+
+
+        #for files where user input is provided and they are directly proccessed
+
+
+        #
+        # based on the semantic search , raw memory data will be fetched. also, episodic memory will be fetched
+        # they will be written down as a "context" for the user
+
+
+        # i get scores for the episodic memory and the semantic memory
+        # i get only the data with the highest score
+        # i use that data to form the context
         # file_upload
         #
 
-        if content is not None:
 
-            # operations -> translate, structure, load to db
+        #for files where user input is provided and they are directly proccessed
 
-            list_of_operations = ["translate", "structure", "load to db"]
 
-            prompt_filter = ChatPromptTemplate.from_template(
-                "Filter and remove uneccessary information that is not relevant in the user query {query}")
-            chain_filter = prompt_filter | self.llm
-            output = await chain_filter.ainvoke({"query": user_input})
-
-            class Task(BaseModel):
-                """Schema for an individual task."""
-                task_order: str = Field(..., description="The order at which the task needs to be performed")
-                task_name: str = Field(None, description="The task that needs to be performed")
-                operation: str = Field(None, description="The operation to be performed")
-
-            class TaskList(BaseModel):
-                """Schema for the record containing a list of tasks."""
-                tasks: List[Task] = Field(..., description="List of tasks")
-
-            prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
-            # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
-            # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
-            prompt_msgs = [
-                SystemMessage(
-                    content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
-                ),
-                HumanMessage(content="Decompose based on the following prompt:"),
-                HumanMessagePromptTemplate.from_template("{input}"),
-                HumanMessage(content="Tips: Make sure to answer in the correct format"),
-                HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
-
-            ]
-            prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-            chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
-            from langchain.callbacks import get_openai_callback
-            with get_openai_callback() as cb:
-                output = await chain.arun(input=prompt_filter_chunk, verbose=True)
-                print(cb)
-            # output = json.dumps(output)
-            my_object = parse_obj_as(TaskList, output)
-            print("HERE IS THE OUTPUT", my_object.json())
-
-            data = json.loads(my_object.json())
-
-            # Extract the list of tasks
-            tasks_list = data["tasks"]
-
-            for task in tasks_list:
-                class TranslateText(BaseModel):
-                    observation: str = Field(
-                        description="observation we want to translate"
-                    )
-
-                @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
-                def translate_to_en(observation, args_schema=TranslateText):
-                    """Translate to English"""
-                    out = GoogleTranslator(source='auto', target='en').translate(text=observation)
-                    return out
-
-                agent = initialize_agent(
-                    llm=self.llm,
-                    tools=[translate_to_en],
-                    agent=AgentType.OPENAI_FUNCTIONS,
-
-                    verbose=True,
-                )
-
-                agent.run(task)
-
-            # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
-            # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
-            # await self.encoding(output_translated)
-            # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
-            # print(freshness_score)
-            # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
-            user_relevance_score = "0"
-            # similarity score between the user input and the content already available in the buffer
-
-            # write this to episodic memory
-
-            # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
-            # chain_filter = prompt_filter | self.llm
-            # output = await chain_filter.ainvoke({"query": user_input})
-
-            # print(output)
-
-        if content is None:
-            # Sensory and Linguistic Processing
-            prompt_filter = ChatPromptTemplate.from_template(
-                "Filter and remove uneccessary information that is not relevant in the user query {query}")
-            chain_filter = prompt_filter | self.llm
-            output = await chain_filter.ainvoke({"query": user_input})
-            translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
-
-            def top_down_processing():
-                """Top-down processing"""
-                pass
-
-            def bottom_up_processing():
-                """Bottom-up processing"""
-                pass
-
-            def interactive_processing():
-                """interactive processing"""
-                pass
-
-            working_memory_activation = "bla"
-
-            prompt_chunk = ChatPromptTemplate.from_template(
-                "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
-            chain_chunk = prompt_chunk | self.llm
-            output_chunks = await chain_chunk.ainvoke({"query": output.content})
-
-            print(output_chunks.content)
+        #
+        # #
+        #
+        # if content is not None:
+        #
+        #     # operations -> translate, structure, load to db
+        #
+        #     list_of_operations = ["translate", "structure", "load to db"]
+        #
+        #     prompt_filter = ChatPromptTemplate.from_template(
+        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
+        #     chain_filter = prompt_filter | self.llm
+        #     output = await chain_filter.ainvoke({"query": user_input})
+        #
+        #     class Task(BaseModel):
+        #         """Schema for an individual task."""
+        #         task_order: str = Field(..., description="The order at which the task needs to be performed")
+        #         task_name: str = Field(None, description="The task that needs to be performed")
+        #         operation: str = Field(None, description="The operation to be performed")
+        #
+        #     class TaskList(BaseModel):
+        #         """Schema for the record containing a list of tasks."""
+        #         tasks: List[Task] = Field(..., description="List of tasks")
+        #
+        #     prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
+        #     # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
+        #     # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
+        #     prompt_msgs = [
+        #         SystemMessage(
+        #             content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
+        #         ),
+        #         HumanMessage(content="Decompose based on the following prompt:"),
+        #         HumanMessagePromptTemplate.from_template("{input}"),
+        #         HumanMessage(content="Tips: Make sure to answer in the correct format"),
+        #         HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
+        #
+        #     ]
+        #     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+        #     chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
+        #     from langchain.callbacks import get_openai_callback
+        #     with get_openai_callback() as cb:
+        #         output = await chain.arun(input=prompt_filter_chunk, verbose=True)
+        #         print(cb)
+        #     # output = json.dumps(output)
+        #     my_object = parse_obj_as(TaskList, output)
+        #     print("HERE IS THE OUTPUT", my_object.json())
+        #
+        #     data = json.loads(my_object.json())
+        #
+        #     # Extract the list of tasks
+        #     tasks_list = data["tasks"]
+        #
+        #     for task in tasks_list:
+        #         class TranslateText(BaseModel):
+        #             observation: str = Field(
+        #                 description="observation we want to translate"
+        #             )
+        #
+        #         @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
+        #         def translate_to_en(observation, args_schema=TranslateText):
+        #             """Translate to English"""
+        #             out = GoogleTranslator(source='auto', target='en').translate(text=observation)
+        #             return out
+        #
+        #         agent = initialize_agent(
+        #             llm=self.llm,
+        #             tools=[translate_to_en],
+        #             agent=AgentType.OPENAI_FUNCTIONS,
+        #
+        #             verbose=True,
+        #         )
+        #
+        #         agent.run(task)
+        #
+        #     # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
+        #     # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
+        #     # await self.encoding(output_translated)
+        #     # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
+        #     # print(freshness_score)
+        #     # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
+        #     user_relevance_score = "0"
+        #     # similarity score between the user input and the content already available in the buffer
+        #
+        #     # write this to episodic memory
+        #
+        #     # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
+        #     # chain_filter = prompt_filter | self.llm
+        #     # output = await chain_filter.ainvoke({"query": user_input})
+        #
+        #     # print(output)
+        #
+        # if content is None:
+        #     # Sensory and Linguistic Processing
+        #     prompt_filter = ChatPromptTemplate.from_template(
+        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
+        #     chain_filter = prompt_filter | self.llm
+        #     output = await chain_filter.ainvoke({"query": user_input})
+        #     translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
+        #
+        #     def top_down_processing():
+        #         """Top-down processing"""
+        #         pass
+        #
+        #     def bottom_up_processing():
+        #         """Bottom-up processing"""
+        #         pass
+        #
+        #     def interactive_processing():
+        #         """interactive processing"""
+        #         pass
+        #
+        #     working_memory_activation = "bla"
+        #
+        #     prompt_chunk = ChatPromptTemplate.from_template(
+        #         "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
+        #     chain_chunk = prompt_chunk | self.llm
+        #     output_chunks = await chain_chunk.ainvoke({"query": output.content})
+        #
+        #     print(output_chunks.content)
 
         # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
         # retriever = WeaviateHybridSearchRetriever(
@@ -969,10 +1174,11 @@ class Memory:
         return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace)
 
     async def _delete_buffer_memory(self, params: str = None):
-        return await self.long_term_memory.episodic_buffer._delete_memories(
+        return await self.short_term_memory.episodic_buffer._delete_memories(
             params=params
         )
-
+    async def _available_operations(self):
+        return await self.long_term_memory.episodic_buffer._available_operations()
 
 async def main():
     memory = Memory(user_id="123")
@@ -994,15 +1200,34 @@ async def main():
     gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ")
     print(gg)
 
-    # gg = await memory._delete_episodic_memory()
+    # gg = await memory._delete_buffer_memory()
     # print(gg)
 
-    # ggur = await memory._add_episodic_memory(observation = "bla bla bla", params=params)
+    episodic = """{
+        "start_date": "2023-08-23",
+        "end_date": "2023-08-30",
+        "user_query": "How can I plan a healthy diet?",
+        "action_steps": [
+            {
+                "step_number": 1,
+                "description": "Research and gather information about basic principles of a healthy diet."
+            },
+            {
+                "step_number": 2,
+                "description": "Create a weekly meal plan that includes a variety of nutritious foods."
+            },
+            {
+                "step_number": 3,
+                "description": "Prepare and cook meals according to your meal plan. Include fruits, vegetables, lean proteins, and whole grains."
+            }
+        ]
+    }"""
+    #
+    # ggur = await memory._add_episodic_memory(observation = episodic, params=params)
     # print(ggur)
-    # ggur = await memory._fetch_episodic_memory(observation = "bla bla bla")
-    # print(ggur)
-    # fff = await memory._fetch_memories_buffer(user_input = "bla bla bla", namespace="Test")
-    # print(fff)
+
+    # fff = await memory._fetch_episodic_memory(observation = "healthy diet")
+    # print(len(fff["data"]["Get"]["EPISODICMEMORY"]))
 
 
 if __name__ == "__main__":
diff --git a/level_2/poetry.lock b/level_2/poetry.lock
index 7b95c721a..cdfb4ca4a 100644
--- a/level_2/poetry.lock
+++ b/level_2/poetry.lock
@@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""}
 
 [[package]]
 name = "boto3"
-version = "1.28.30"
+version = "1.28.32"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "boto3-1.28.30-py3-none-any.whl", hash = "sha256:e095ede98d3680e65966ab71f273b7d86938f5d853773ef96f4cb646277c2a4b"},
-    {file = "boto3-1.28.30.tar.gz", hash = "sha256:2b509a959966a572f15db5768a18066ce1f53022ac53fca9421c620219fa3998"},
+    {file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"},
+    {file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"},
 ]
 
 [package.dependencies]
-botocore = ">=1.31.30,<1.32.0"
+botocore = ">=1.31.32,<1.32.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.6.0,<0.7.0"
 
@@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.31.30"
+version = "1.31.32"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "botocore-1.31.30-py3-none-any.whl", hash = "sha256:269f20dcadd8dfd0c26d0e6fbceb84814ff6638ff3aafcc5324b9fb9949a7051"},
-    {file = "botocore-1.31.30.tar.gz", hash = "sha256:3cf6a9d7621b897c9ff23cd02113826141b3dd3d7e90273b661efc4dc05f84e2"},
+    {file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"},
+    {file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"},
 ]
 
 [package.dependencies]
@@ -1053,13 +1053,13 @@ requests = ">=2.20.0,<3.0"
 
 [[package]]
 name = "gptcache"
-version = "0.1.39.1"
+version = "0.1.40"
 description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
 optional = false
 python-versions = ">=3.8.1"
 files = [
-    {file = "gptcache-0.1.39.1-py3-none-any.whl", hash = "sha256:81355f7878e12a820dccb017f8a45ea44b73178dac07108c56db664a476a4a07"},
-    {file = "gptcache-0.1.39.1.tar.gz", hash = "sha256:a9c629fdeaa94b78a6cfe707a5f9a3a52b361655a3f01327709ca00c78a500eb"},
+    {file = "gptcache-0.1.40-py3-none-any.whl", hash = "sha256:ba323e5e46b100fa7663b5f4d164cc2aee60f343184ed03ec2d2bb95e9f47c50"},
+    {file = "gptcache-0.1.40.tar.gz", hash = "sha256:5fe4bcf3a45946177cb845b3e1ec01159f10622600e1384b9de0c7c6065d10d5"},
 ]
 
 [package.dependencies]
@@ -1415,13 +1415,13 @@ data = ["language-data (>=1.1,<2.0)"]
 
 [[package]]
 name = "langsmith"
-version = "0.0.25"
+version = "0.0.26"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langsmith-0.0.25-py3-none-any.whl", hash = "sha256:d595435ad21fa6077550d7c85472935d1e8241afa042c1e29287d2c95c3ed151"},
-    {file = "langsmith-0.0.25.tar.gz", hash = "sha256:e728c398fc1adaa0ed8abeb21f6a92d7fb19fe3ab49d3911c22b03dfe25935d6"},
+    {file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"},
+    {file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"},
 ]
 
 [package.dependencies]
@@ -3561,13 +3561,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python
 
 [[package]]
 name = "weaviate-client"
-version = "3.22.1"
+version = "3.23.0"
 description = "A python native Weaviate client"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "weaviate-client-3.22.1.tar.gz", hash = "sha256:aff61bd3f5d74df20a62328443e3aa9c860d5330fdfb19c4d8ddc44cb604032f"},
-    {file = "weaviate_client-3.22.1-py3-none-any.whl", hash = "sha256:01843a4899a227300e570409e77628e9d1b28476313f94943c37aee3f75112e1"},
+    {file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"},
+    {file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"},
 ]
 
 [package.dependencies]
@@ -3581,13 +3581,13 @@ grpc = ["grpcio", "grpcio-tools"]
 
 [[package]]
 name = "wheel"
-version = "0.41.1"
+version = "0.41.2"
 description = "A built-package format for Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "wheel-0.41.1-py3-none-any.whl", hash = "sha256:473219bd4cbedc62cea0cb309089b593e47c15c4a2531015f94e4e3b9a0f6981"},
-    {file = "wheel-0.41.1.tar.gz", hash = "sha256:12b911f083e876e10c595779709f8a88a59f45aacc646492a67fe9ef796c1b47"},
+    {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"},
+    {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"},
 ]
 
 [package.extras]
@@ -3795,4 +3795,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "13258c777467d93ab73021225322da670e42513cedec6252a40aacf74822ea68"
+content-hash = "788ca51ba313eac5f1dbcadfd7f91109b8b7a7734a1f16e10c8fb2e3b435a606"
diff --git a/level_2/pyproject.toml b/level_2/pyproject.toml
index c8b2c95ee..461255af9 100644
--- a/level_2/pyproject.toml
+++ b/level_2/pyproject.toml
@@ -39,6 +39,7 @@ dlt = { version ="^0.3.8",  extras = ["duckdb"]}
 weaviate-client = "^3.22.1"
 python-multipart = "^0.0.6"
 deep-translator = "^1.11.4"
+humanize = "^4.8.0"
 
 
 

From 697c49a6704b67990b5311e5ec8542319454ffd0 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Wed, 23 Aug 2023 19:59:12 +0200
Subject: [PATCH 03/12] Added fix

---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 101 ++++++++++++++----
 level_2/poetry.lock                           |  35 ++----
 level_2/pyproject.toml                        |   2 +-
 3 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 8212db028..8538ef6f5 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -13,6 +13,7 @@ import asyncio
 from typing import Any, Dict, List, Coroutine
 from deep_translator import (GoogleTranslator)
 from langchain.chat_models import ChatOpenAI
+from langchain.output_parsers import PydanticOutputParser
 from langchain.schema import LLMResult, HumanMessage
 from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
 from pydantic import BaseModel, Field, parse_obj_as
@@ -512,14 +513,6 @@ class EpisodicBuffer:
 
         # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
 
-        def _compute_weights(self, context: str):
-            """Computes the weights for the buffer"""
-            pass
-
-        def _temporal_weighting(self, context: str):
-            """Computes the temporal weighting for the buffer"""
-            pass
-
     # async def infer_schema_from_text(self, text: str):
     #     """Infer schema from text"""
     #
@@ -739,7 +732,46 @@ class EpisodicBuffer:
         # Extract the list of tasks
         tasks_list = data["tasks"]
 
+        result_tasks =[]
+
         for task in tasks_list:
+            class PromptWrapper(BaseModel):
+                observation: str = Field(
+                    description="observation we want to fetch from vectordb"
+                )
+            @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
+            def convert_to_structured( observation=None, json_schema=None):
+                """Convert unstructured data to structured data"""
+                BASE_DIR = os.getcwd()
+                json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
+
+                def load_json_or_infer_schema(file_path, document_path):
+                    """Load JSON schema from file or infer schema from text"""
+
+                    # Attempt to load the JSON file
+                    with open(file_path, 'r') as file:
+                        json_schema = json.load(file)
+                    return json_schema
+
+                json_schema =load_json_or_infer_schema(json_path, None)
+                def run_open_ai_mapper(observation=None, json_schema=None):
+                    """Convert unstructured data to structured data"""
+
+                    prompt_msgs = [
+                        SystemMessage(
+                            content="You are a world class algorithm converting unstructured data into structured data."
+                        ),
+                        HumanMessage(content="Convert unstructured data to structured data:"),
+                        HumanMessagePromptTemplate.from_template("{input}"),
+                        HumanMessage(content="Tips: Make sure to answer in the correct format"),
+                    ]
+                    prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+                    chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
+                    output = chain_funct.run(input=observation, llm=self.llm)
+                    return output
+
+                result = run_open_ai_mapper(observation, json_schema)
+                return result
             class TranslateText(BaseModel):
                 observation: str = Field(
                     description="observation we want to translate"
@@ -753,7 +785,7 @@ class EpisodicBuffer:
 
             agent = initialize_agent(
                 llm=self.llm,
-                tools=[translate_to_en],
+                tools=[translate_to_en, convert_to_structured],
                 agent=AgentType.OPENAI_FUNCTIONS,
 
                 verbose=True,
@@ -761,25 +793,50 @@ class EpisodicBuffer:
             print("HERE IS THE TASK", task)
             output = agent.run(input=task)
             print(output)
-            await self.encoding(output)
+            result_tasks.append(task)
+            result_tasks.append(output)
+
+
+        await self.encoding(str(result_tasks), self.namespace, params=params)
 
 
 
         buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
 
-        #json here
 
-        prompt_filter = ChatPromptTemplate.from_template(
-            "Format and collect all outputs from the tasks presented here {tasks} and their results {results}")
-        chain_filter_chunk = prompt_filter | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
-        output = await chain_filter_chunk.ainvoke({"query": buffer_result})
-        print("HERE IS THE OUTPUT", output)
+        class EpisodicTask(BaseModel):
+            """Schema for an individual task."""
+            task_order: str = Field(..., description="The order at which the task needs to be performed")
+            task_name: str = Field(None, description="The task that needs to be performed")
+            operation: str = Field(None, description="The operation to be performed")
 
+        class EpisodicList(BaseModel):
+            """Schema for the record containing a list of tasks."""
+            tasks: List[EpisodicTask] = Field(..., description="List of tasks")
+            start_date: str = Field(..., description="The order at which the task needs to be performed")
+            end_date: str = Field(..., description="The order at which the task needs to be performed")
+            user_query: str = Field(..., description="The order at which the task needs to be performed")
 
-        memory = Memory(user_id=self.user_id)
-        await memory.async_init()
+        parser = PydanticOutputParser(pydantic_object=EpisodicList)
 
-        lookup_value = await memory._add_episodic_memory(observation=str(output), params={})
+        prompt = PromptTemplate(
+            template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}",
+            input_variables=["query", "steps", "buffer"],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+
+        _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
+
+        return "a few things to do like load episodic memory in a structured format"
+
+        # output = self.llm(_input.to_string())
+        #
+        # parser.parse(output)
+        # memory = Memory(user_id=self.user_id)
+        # await memory.async_init()
+        #
+        # lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
+        # return lookup_value
 
 
         #load to buffer once is done
@@ -1163,8 +1220,8 @@ class Memory:
             params=params
         )
 
-    async def _run_buffer(self, user_input: str, content: str = None):
-        return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content)
+    async def _run_buffer(self, user_input: str, content: str = None, params:str=None):
+        return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content, params=params)
 
     async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None):
         return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace,
@@ -1197,7 +1254,7 @@ async def main():
         "validity_end": "2024-07-31"
     }
 
-    gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ")
+    gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ", params=params)
     print(gg)
 
     # gg = await memory._delete_buffer_memory()
diff --git a/level_2/poetry.lock b/level_2/poetry.lock
index cdfb4ca4a..8233ad038 100644
--- a/level_2/poetry.lock
+++ b/level_2/poetry.lock
@@ -1362,39 +1362,38 @@ files = [
 
 [[package]]
 name = "langchain"
-version = "0.0.250"
+version = "0.0.271"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"},
-    {file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"},
+    {file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
+    {file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
 dataclasses-json = ">=0.5.7,<0.6.0"
-langsmith = ">=0.0.11,<0.1.0"
+langsmith = ">=0.0.21,<0.1.0"
 numexpr = ">=2.8.4,<3.0.0"
 numpy = ">=1,<2"
-openapi-schema-pydantic = ">=1.2,<2.0"
-pydantic = ">=1,<2"
-PyYAML = ">=5.4.1"
+pydantic = ">=1,<3"
+PyYAML = ">=5.3"
 requests = ">=2,<3"
 SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
-all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"]
+all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
 azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
 clarifai = ["clarifai (>=9.1.0)"]
 cohere = ["cohere (>=4,<5)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"]
+extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
-llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"]
+llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
 openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
 qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
 text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
@@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc
 embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
 wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
 
-[[package]]
-name = "openapi-schema-pydantic"
-version = "1.2.4"
-description = "OpenAPI (v3) specification schema as pydantic class"
-optional = false
-python-versions = ">=3.6.1"
-files = [
-    {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"},
-    {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"},
-]
-
-[package.dependencies]
-pydantic = ">=1.8.2"
-
 [[package]]
 name = "orjson"
 version = "3.9.5"
@@ -3795,4 +3780,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "788ca51ba313eac5f1dbcadfd7f91109b8b7a7734a1f16e10c8fb2e3b435a606"
+content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991"
diff --git a/level_2/pyproject.toml b/level_2/pyproject.toml
index 461255af9..5a252ba37 100644
--- a/level_2/pyproject.toml
+++ b/level_2/pyproject.toml
@@ -8,7 +8,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.10"
 #langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
-langchain = "v0.0.250"
+langchain = "v0.0.271"
 
 nltk = "3.8.1"
 openai = "0.27.8"

From 2f85f6baff6a2c62d7374d458ab5816dc35bee7c Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 10:39:12 +0200
Subject: [PATCH 04/12] Fixed translate script part

---
 .../level_2_pdf_vectorstore__dlt_contracts.py   | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 8538ef6f5..189b063e6 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -655,7 +655,7 @@ class EpisodicBuffer:
 
         #we just filter the data here
         prompt_filter = ChatPromptTemplate.from_template(
-            "Filter and remove uneccessary information that is not relevant in the user query {query}")
+            "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}")
         chain_filter = prompt_filter | self.llm
         output = await chain_filter.ainvoke({"query": user_input})
 
@@ -699,6 +699,7 @@ class EpisodicBuffer:
             task_order: str = Field(..., description="The order at which the task needs to be performed")
             task_name: str = Field(None, description="The task that needs to be performed")
             operation: str = Field(None, description="The operation to be performed")
+            original_query: str = Field(None, description="Original user query provided")
 
         class TaskList(BaseModel):
             """Schema for the record containing a list of tasks."""
@@ -777,15 +778,15 @@ class EpisodicBuffer:
                     description="observation we want to translate"
                 )
 
-            @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
-            def translate_to_en(observation, args_schema=TranslateText):
+            @tool("translate_to_de", args_schema=TranslateText, return_direct=True)
+            def translate_to_de(observation, args_schema=TranslateText):
                 """Translate to English"""
-                out = GoogleTranslator(source='auto', target='en').translate(text=observation)
+                out = GoogleTranslator(source='auto', target='de').translate(text=observation)
                 return out
 
             agent = initialize_agent(
                 llm=self.llm,
-                tools=[translate_to_en, convert_to_structured],
+                tools=[translate_to_de, convert_to_structured],
                 agent=AgentType.OPENAI_FUNCTIONS,
 
                 verbose=True,
@@ -797,6 +798,10 @@ class EpisodicBuffer:
             result_tasks.append(output)
 
 
+
+        print("HERE IS THE RESULT TASKS", str(result_tasks))
+
+
         await self.encoding(str(result_tasks), self.namespace, params=params)
 
 
@@ -827,6 +832,8 @@ class EpisodicBuffer:
 
         _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
 
+        print("a few things to do like load episodic memory in a structured format")
+
         return "a few things to do like load episodic memory in a structured format"
 
         # output = self.llm(_input.to_string())

From b65d8016f3613934889aafe07a665a38f7f0f35a Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 12:04:53 +0200
Subject: [PATCH 05/12] Wrap up initial buffer flow

---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 43 +++++++++++++------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 189b063e6..1ca5946a6 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -510,6 +510,12 @@ class EpisodicBuffer:
             model_name="gpt-4-0613",
             callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
         )
+        self.llm_base = OpenAI(
+            temperature=0.0,
+            max_tokens=1200,
+            openai_api_key=os.environ.get('OPENAI_API_KEY'),
+            model_name="gpt-4-0613"
+        )
 
         # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
 
@@ -652,6 +658,10 @@ class EpisodicBuffer:
 
         list_of_operations = await self.available_operations()
 
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+        await memory._delete_buffer_memory()
+
 
         #we just filter the data here
         prompt_filter = ChatPromptTemplate.from_template(
@@ -691,9 +701,6 @@ class EpisodicBuffer:
             #do i need to do it for the episodic + raw data, might make sense
 
 
-
-        print("HERE WE ARE")
-
         class Task(BaseModel):
             """Schema for an individual task."""
             task_order: str = Field(..., description="The order at which the task needs to be performed")
@@ -808,12 +815,15 @@ class EpisodicBuffer:
 
         buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
 
+        print("HERE IS THE RESULT TASKS", str(buffer_result))
+
 
         class EpisodicTask(BaseModel):
             """Schema for an individual task."""
             task_order: str = Field(..., description="The order at which the task needs to be performed")
             task_name: str = Field(None, description="The task that needs to be performed")
             operation: str = Field(None, description="The operation to be performed")
+            operation_result: str = Field(None, description="The result of the operation")
 
         class EpisodicList(BaseModel):
             """Schema for the record containing a list of tasks."""
@@ -831,19 +841,24 @@ class EpisodicBuffer:
         )
 
         _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
-
-        print("a few things to do like load episodic memory in a structured format")
-
-        return "a few things to do like load episodic memory in a structured format"
-
-        # output = self.llm(_input.to_string())
         #
-        # parser.parse(output)
-        # memory = Memory(user_id=self.user_id)
-        # await memory.async_init()
+        # print("a few things to do like load episodic memory in a structured format")
         #
-        # lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
-        # return lookup_value
+        # return "a few things to do like load episodic memory in a structured format"
+
+        output = self.llm_base(_input.to_string())
+
+        result_parsing = parser.parse(output)
+
+        print("here is the parsing result", result_parsing)
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+        #
+        lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
+        #now we clean up buffer memory
+
+        await memory._delete_buffer_memory()
+        return lookup_value
 
 
         #load to buffer once is done

From b4baa9d1fe74efe018bfacfd576cedd9f98c92b9 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 12:41:13 +0200
Subject: [PATCH 06/12] Wrap up initial buffer flow

---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 48 +++++++++++++++----
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 1ca5946a6..305146bcf 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -655,7 +655,8 @@ class EpisodicBuffer:
 
         """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
 
-
+        # we get a list of available operations for our buffer to consider
+        # these operations are what we can do with the data, in the context of PDFs (load, translate, structure, etc)
         list_of_operations = await self.available_operations()
 
         memory = Memory(user_id=self.user_id)
@@ -663,15 +664,15 @@ class EpisodicBuffer:
         await memory._delete_buffer_memory()
 
 
-        #we just filter the data here
+        #we just filter the data here to make sure input is clean
         prompt_filter = ChatPromptTemplate.from_template(
             "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}")
         chain_filter = prompt_filter | self.llm
         output = await chain_filter.ainvoke({"query": user_input})
 
-
+        # this part is mostly unfinished but the idea is to apply different algorithms to the data to fetch the most relevant information from the vector stores
+        context = []
         if params:
-            context =[]
 
             if "freshness" in params:
                 params.get('freshness', None) # get the value of freshness
@@ -688,19 +689,50 @@ class EpisodicBuffer:
 
 
         else:
-            #defaults to semantic search
+            #defaults to semantic search if we don't want to apply algorithms on the vectordb data
             memory = Memory(user_id=self.user_id)
             await memory.async_init()
 
             lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output))
-            lookup_value_semantic = await memory._fetch_episodic_memory(observation=str(output))
+            lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output))
             lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
 
-            context = [lookup_value_episodic, lookup_value_semantic, lookup_value_buffer]
+
+            context.append(lookup_value_buffer)
+            context.append(lookup_value_semantic)
+            context.append(lookup_value_episodic)
+
             #copy the context over into the buffer
             #do i need to do it for the episodic + raw data, might make sense
+        print( "HERE IS THE CONTEXT", context)
+        class BufferRawContextTerms(BaseModel):
+            """Schema for documentGroups"""
+            semantic_search_term: str = Field(..., description="The search term to use to get relevant input based on user query")
+            document_description: str = Field(None, description="The short summary of what the document is about")
+            document_relevance: str = Field(None, description="The relevance of the document for the task on the scale from 1 to 5")
 
 
+        class BufferRawContextList(BaseModel):
+            """Buffer raw context processed by the buffer"""
+            docs: List[BufferRawContextTerms] = Field(..., description="List of docs")
+            user_query: str = Field(..., description="The original user query")
+
+
+        parser = PydanticOutputParser(pydantic_object=BufferRawContextList)
+
+        prompt = PromptTemplate(
+            template="Summarize and create semantic search queries and relevant document summaries for the user query.\n{format_instructions}\nOriginal query is: {query}\n Retrieved context is: {context}",
+            input_variables=["query", "context"],
+            partial_variables={"format_instructions": parser.get_format_instructions()},
+        )
+
+        _input = prompt.format_prompt(query=user_input,  context=context)
+
+        document_context_result = self.llm_base(_input.to_string())
+
+        document_context_result_parsed = parser.parse(document_context_result)
+
+        print("HERE ARE THE DOCS PARSED AND STRUCTURED",document_context_result_parsed)
         class Task(BaseModel):
             """Schema for an individual task."""
             task_order: str = Field(..., description="The order at which the task needs to be performed")
@@ -712,7 +744,7 @@ class EpisodicBuffer:
             """Schema for the record containing a list of tasks."""
             tasks: List[Task] = Field(..., description="List of tasks")
 
-        prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
+        prompt_filter_chunk = f"The raw context data is {str(document_context_result_parsed)} Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
         # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
         # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
         prompt_msgs = [

From deab564373264c99a6067a4391f66b3134e6b315 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 16:01:51 +0200
Subject: [PATCH 07/12] Add endpoint for buffer

---
 level_2/api.py                                |  20 ++
 .../level_2_pdf_vectorstore__dlt_contracts.py | 288 ------------------
 2 files changed, 20 insertions(+), 288 deletions(-)

diff --git a/level_2/api.py b/level_2/api.py
index 57942d582..b4b52ede8 100644
--- a/level_2/api.py
+++ b/level_2/api.py
@@ -249,6 +249,26 @@ async def available_buffer_actions(
 
         return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
 
+@app.post("/run-buffer", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
 
 #
     #     # Process each uploaded PDF file
diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 305146bcf..91e727ea3 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -900,294 +900,6 @@ class EpisodicBuffer:
 
 
 
-
-
-
-        #for files where user input is provided and they are directly proccessed
-
-
-        #
-        # based on the semantic search , raw memory data will be fetched. also, episodic memory will be fetched
-        # they will be written down as a "context" for the user
-
-
-        # i get scores for the episodic memory and the semantic memory
-        # i get only the data with the highest score
-        # i use that data to form the context
-        # file_upload
-        #
-
-
-        #for files where user input is provided and they are directly proccessed
-
-
-        #
-        # #
-        #
-        # if content is not None:
-        #
-        #     # operations -> translate, structure, load to db
-        #
-        #     list_of_operations = ["translate", "structure", "load to db"]
-        #
-        #     prompt_filter = ChatPromptTemplate.from_template(
-        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     chain_filter = prompt_filter | self.llm
-        #     output = await chain_filter.ainvoke({"query": user_input})
-        #
-        #     class Task(BaseModel):
-        #         """Schema for an individual task."""
-        #         task_order: str = Field(..., description="The order at which the task needs to be performed")
-        #         task_name: str = Field(None, description="The task that needs to be performed")
-        #         operation: str = Field(None, description="The operation to be performed")
-        #
-        #     class TaskList(BaseModel):
-        #         """Schema for the record containing a list of tasks."""
-        #         tasks: List[Task] = Field(..., description="List of tasks")
-        #
-        #     prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
-        #     # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
-        #     # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
-        #     prompt_msgs = [
-        #         SystemMessage(
-        #             content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
-        #         ),
-        #         HumanMessage(content="Decompose based on the following prompt:"),
-        #         HumanMessagePromptTemplate.from_template("{input}"),
-        #         HumanMessage(content="Tips: Make sure to answer in the correct format"),
-        #         HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
-        #
-        #     ]
-        #     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-        #     chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
-        #     from langchain.callbacks import get_openai_callback
-        #     with get_openai_callback() as cb:
-        #         output = await chain.arun(input=prompt_filter_chunk, verbose=True)
-        #         print(cb)
-        #     # output = json.dumps(output)
-        #     my_object = parse_obj_as(TaskList, output)
-        #     print("HERE IS THE OUTPUT", my_object.json())
-        #
-        #     data = json.loads(my_object.json())
-        #
-        #     # Extract the list of tasks
-        #     tasks_list = data["tasks"]
-        #
-        #     for task in tasks_list:
-        #         class TranslateText(BaseModel):
-        #             observation: str = Field(
-        #                 description="observation we want to translate"
-        #             )
-        #
-        #         @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
-        #         def translate_to_en(observation, args_schema=TranslateText):
-        #             """Translate to English"""
-        #             out = GoogleTranslator(source='auto', target='en').translate(text=observation)
-        #             return out
-        #
-        #         agent = initialize_agent(
-        #             llm=self.llm,
-        #             tools=[translate_to_en],
-        #             agent=AgentType.OPENAI_FUNCTIONS,
-        #
-        #             verbose=True,
-        #         )
-        #
-        #         agent.run(task)
-        #
-        #     # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
-        #     # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
-        #     # await self.encoding(output_translated)
-        #     # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
-        #     # print(freshness_score)
-        #     # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
-        #     user_relevance_score = "0"
-        #     # similarity score between the user input and the content already available in the buffer
-        #
-        #     # write this to episodic memory
-        #
-        #     # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     # chain_filter = prompt_filter | self.llm
-        #     # output = await chain_filter.ainvoke({"query": user_input})
-        #
-        #     # print(output)
-        #
-        # if content is None:
-        #     # Sensory and Linguistic Processing
-        #     prompt_filter = ChatPromptTemplate.from_template(
-        #         "Filter and remove uneccessary information that is not relevant in the user query {query}")
-        #     chain_filter = prompt_filter | self.llm
-        #     output = await chain_filter.ainvoke({"query": user_input})
-        #     translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
-        #
-        #     def top_down_processing():
-        #         """Top-down processing"""
-        #         pass
-        #
-        #     def bottom_up_processing():
-        #         """Bottom-up processing"""
-        #         pass
-        #
-        #     def interactive_processing():
-        #         """interactive processing"""
-        #         pass
-        #
-        #     working_memory_activation = "bla"
-        #
-        #     prompt_chunk = ChatPromptTemplate.from_template(
-        #         "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
-        #     chain_chunk = prompt_chunk | self.llm
-        #     output_chunks = await chain_chunk.ainvoke({"query": output.content})
-        #
-        #     print(output_chunks.content)
-
-        # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
-        # retriever = WeaviateHybridSearchRetriever(
-        #     client=client,
-        #     index_name="EVENTBUFFER",
-        #     text_key="text",
-        #     attributes=[],
-        #     embedding=embeddings,
-        #     create_schema_if_missing=True,
-        # )
-
-        # vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                      index_name=self.index_name, db_type=self.db_type, namespace="EVENTBUFFER")
-
-        # query = vector_db.
-
-        # retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
-        # memory = VectorStoreRetrieverMemory(retriever=retriever)
-        # class PromptWrapper(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #         # ,
-        #     # json_schema: str = Field(description="json schema we want to infer")
-        # @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
-        # def convert_to_structured( observation=None, json_schema=None):
-        #     """Convert unstructured data to structured data"""
-        #     BASE_DIR = os.getcwd()
-        #     json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
-        #
-        #     def load_json_or_infer_schema(file_path, document_path):
-        #         """Load JSON schema from file or infer schema from text"""
-        #
-        #         # Attempt to load the JSON file
-        #         with open(file_path, 'r') as file:
-        #             json_schema = json.load(file)
-        #         return json_schema
-        #
-        #     json_schema =load_json_or_infer_schema(json_path, None)
-        #     def run_open_ai_mapper(observation=None, json_schema=None):
-        #         """Convert unstructured data to structured data"""
-        #
-        #         prompt_msgs = [
-        #             SystemMessage(
-        #                 content="You are a world class algorithm converting unstructured data into structured data."
-        #             ),
-        #             HumanMessage(content="Convert unstructured data to structured data:"),
-        #             HumanMessagePromptTemplate.from_template("{input}"),
-        #             HumanMessage(content="Tips: Make sure to answer in the correct format"),
-        #         ]
-        #         prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-        #         chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
-        #         output = chain_funct.run(input=observation, llm=self.llm)
-        #         yield output
-        #     pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data')
-        #     info = pipeline.run(data=run_open_ai_mapper(prompt, json_schema))
-        #     return print(info)
-        #
-        #
-        # class GoalWrapper(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #
-        # @tool("fetch_memory_wrapper", args_schema=GoalWrapper, return_direct=True)
-        # def fetch_memory_wrapper(observation, args_schema=GoalWrapper):
-        #     """Fetches data from the VectorDB and returns it as a python dictionary."""
-        #     print("HELLO, HERE IS THE OBSERVATION: ", observation)
-        #
-        #     marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
-        #     @ai_classifier
-        #     class MemoryRoute(Enum):
-        #         """Represents distinct routes  for  different memory types."""
-        #
-        #         storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-        #         raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-        #         raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-        #         long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-        #         raw_information_to_store_as_events = "EVENTBUFFER"
-        #
-        #     namespace= MemoryRoute(observation)
-        #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
-        #
-        #
-        #     query = vector_db.fetch_memories(observation)
-        #
-        #     return query
-        #
-        # class UpdatePreferences(BaseModel):
-        #     observation: str = Field(
-        #         description="observation we want to fetch from vectordb"
-        #     )
-        #
-        # @tool("add_memories_wrapper", args_schema=UpdatePreferences, return_direct=True)
-        # def add_memories_wrapper(observation, args_schema=UpdatePreferences):
-        #     """Updates user preferences in the VectorDB."""
-        #     @ai_classifier
-        #     class MemoryRoute(Enum):
-        #         """Represents distinct routes  for  different memory types."""
-        #
-        #         storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
-        #         raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
-        #         raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
-        #         long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
-        #         raw_information_to_store_as_events = "EVENTBUFFER"
-        #
-        #     namespace= MemoryRoute(observation)
-        #     print("HELLO, HERE IS THE OBSERVATION 2: ")
-        #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-        #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
-        #     return vector_db.add_memories(observation)
-        #
-        # agent = initialize_agent(
-        #     llm=self.llm,
-        #     tools=[convert_to_structured,fetch_memory_wrapper, add_memories_wrapper],
-        #     agent=AgentType.OPENAI_FUNCTIONS,
-        #
-        #     verbose=True,
-        # )
-        #
-        # prompt = """
-        #     Based on all the history and information of this user, decide based on user query query: {query} which of the following tasks needs to be done:
-        #     1. Memory retrieval , 2. Memory update,  3. Convert data to structured   If the query is not any of these, then classify it as 'Other'
-        #     Return the result in format:  'Result_type': 'Goal', "Original_query": "Original query"
-        #     """
-        #
-        # # template = Template(prompt)
-        # # output = template.render(query=user_input)
-        # # complete_query = output
-        # complete_query = PromptTemplate(
-        #     input_variables=[ "query"], template=prompt
-        # )
-        # summary_chain = LLMChain(
-        #     llm=self.llm, prompt=complete_query, verbose=True
-        # )
-        # from langchain.chains import SimpleSequentialChain
-        #
-        # overall_chain = SimpleSequentialChain(
-        #     chains=[summary_chain, agent], verbose=True
-        # )
-        # output = overall_chain.run(user_input)
-        # return output
-
-
-# DEFINE STM
-# DEFINE LTM
-
 class Memory:
     load_dotenv()
     OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))

From a746739d32de748b6f841f30614ee1045818b376 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 17:53:53 +0200
Subject: [PATCH 08/12] added first pass of the code review

---
 .gitignore                                    |    1 +
 .../level_2_pdf_vectorstore__dlt_contracts.py | 1420 ++++++++++++-----
 2 files changed, 1042 insertions(+), 379 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..2eea525d8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.env
\ No newline at end of file
diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 91e727ea3..4b6c579c5 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -36,8 +36,10 @@ from pathlib import Path
 from langchain import OpenAI, LLMMathChain
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts import ChatPromptTemplate
+import uuid
+from typing import Optional
+
 
-import os
 
 from datetime import datetime
 import os
@@ -86,60 +88,67 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
         print("Hi! I just woke up. Your llm is ending")
 
 
-class VectorDB:
-    OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
-    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+import pinecone
+import weaviate
 
-    def __init__(self, user_id: str, index_name: str, memory_id: str, ltm_memory_id: str = '00000',
-                 st_memory_id: str = '0000', buffer_id: str = '0000', db_type: str = "pinecone", namespace: str = None):
+# Assuming OpenAIEmbeddings and other necessary imports are available
+
+# Default Values
+LTM_MEMORY_ID_DEFAULT = '00000'
+ST_MEMORY_ID_DEFAULT = '0000'
+BUFFER_ID_DEFAULT = '0000'
+
+
+class VectorDBFactory:
+    def create_vector_db(self, user_id: str, index_name: str, memory_id: str,
+                         ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT,
+                         st_memory_id: str = ST_MEMORY_ID_DEFAULT,
+                         buffer_id: str = BUFFER_ID_DEFAULT,
+                         db_type: str = "pinecone",
+                         namespace: str = None):
+        db_map = {
+            "pinecone": PineconeVectorDB,
+            "weaviate": WeaviateVectorDB
+        }
+
+        if db_type in db_map:
+            return db_map[db_type](user_id, index_name, memory_id, ltm_memory_id, st_memory_id, buffer_id, namespace)
+
+        raise ValueError(f"Unsupported database type: {db_type}")
+
+
+class VectorDB:
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+    def __init__(self, user_id: str, index_name: str, memory_id: str,
+                 ltm_memory_id: str = LTM_MEMORY_ID_DEFAULT,
+                 st_memory_id: str = ST_MEMORY_ID_DEFAULT,
+                 buffer_id: str = BUFFER_ID_DEFAULT, namespace: str = None):
         self.user_id = user_id
         self.index_name = index_name
-        self.db_type = db_type
         self.namespace = namespace
         self.memory_id = memory_id
         self.ltm_memory_id = ltm_memory_id
         self.st_memory_id = st_memory_id
         self.buffer_id = buffer_id
-        # if self.db_type == "pinecone":
-        #     self.vectorstore = self.init_pinecone(self.index_name)
-        if self.db_type == "weaviate":
-            self.init_weaviate(namespace=self.namespace)
-        else:
-            raise ValueError(f"Unsupported database type: {db_type}")
-        if self.db_type == "weaviate":
-            self.init_weaviate_client(namespace=self.namespace)
-        else:
-            raise ValueError(f"Unsupported VectorDB client type: {db_type}")
-        load_dotenv()
+
+
+class PineconeVectorDB(VectorDB):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.init_pinecone(self.index_name)
 
     def init_pinecone(self, index_name):
-        load_dotenv()
-        PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
-        PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
-        pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
-        pinecone.Index(index_name)
-        vectorstore: Pinecone = Pinecone.from_existing_index(
+        # Pinecone initialization logic
+        pass
 
-            index_name=self.index_name,
-            embedding=OpenAIEmbeddings(),
-            namespace='RESULT'
-        )
-        return vectorstore
 
-    def init_weaviate_client(self, namespace: str):
-        embeddings = OpenAIEmbeddings()
-        auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
-        client = weaviate.Client(
-            url=os.environ.get('WEAVIATE_URL'),
-            auth_client_secret=auth_config,
-
-            additional_headers={
-                "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
-            }
-        )
-        return client
+class WeaviateVectorDB(VectorDB):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.init_weaviate(self.namespace)
 
     def init_weaviate(self, namespace: str):
+        # Weaviate initialization logic
         embeddings = OpenAIEmbeddings()
         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
         client = weaviate.Client(
@@ -158,70 +167,53 @@ class VectorDB:
             embedding=embeddings,
             create_schema_if_missing=True,
         )
-        return retriever
+        return retriever # If this is part of the initialization, call it here.
+
+    def init_weaviate_client(self, namespace: str):
+        # Weaviate client initialization logic
+        auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
+        client = weaviate.Client(
+            url=os.environ.get('WEAVIATE_URL'),
+            auth_client_secret=auth_config,
+
+            additional_headers={
+                "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
+            }
+        )
+        return client
 
     async def add_memories(self, observation: str, params: dict = None):
-        if self.db_type == "pinecone":
-            # Update Pinecone memories here
-            vectorstore: Pinecone = Pinecone.from_existing_index(
-                index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
-            )
-            retriever = vectorstore.as_retriever()
-            retriever.add_documents(
-                [
-                    Document(
-                        page_content=observation,
-                        metadata={
-                            "inserted_at": datetime.now(),
-                            "text": observation,
-                            "user_id": self.user_id,
-                            "version": params.get('version', None) or "",
-                            "agreement_id": params.get('agreement_id', None) or "",
-                            "privacy_policy": params.get('privacy_policy', None) or "",
-                            "terms_of_service": params.get('terms_of_service', None) or "",
-                            "format": params.get('format', None) or "",
-                            "schema_version": params.get('schema_version', None) or "",
-                            "checksum": params.get('checksum', None) or "",
-                            "owner": params.get('owner', None) or "",
-                            "license": params.get('license', None) or "",
-                            "validity_start": params.get('validity_start', None) or "",
-                            "validity_end": params.get('validity_end', None) or ""
-                        },
-                        namespace=self.namespace,
-                    )
-                ]
-            )
-        elif self.db_type == "weaviate":
-            # Update Weaviate memories here
-            print(self.namespace)
-            retriever = self.init_weaviate(self.namespace)
 
-            return retriever.add_documents([
-                Document(
-                    metadata={
-                        "text": observation,
-                        "user_id": str(self.user_id),
-                        "memory_id": str(self.memory_id),
-                        "ltm_memory_id": str(self.ltm_memory_id),
-                        "st_memory_id": str(self.st_memory_id),
-                        "buffer_id": str(self.buffer_id),
-                        "version": params.get('version', None) or "",
-                        "agreement_id": params.get('agreement_id', None) or "",
-                        "privacy_policy": params.get('privacy_policy', None) or "",
-                        "terms_of_service": params.get('terms_of_service', None) or "",
-                        "format": params.get('format', None) or "",
-                        "schema_version": params.get('schema_version', None) or "",
-                        "checksum": params.get('checksum', None) or "",
-                        "owner": params.get('owner', None) or "",
-                        "license": params.get('license', None) or "",
-                        "validity_start": params.get('validity_start', None) or "",
-                        "validity_end": params.get('validity_end', None) or ""
+        # Update Weaviate memories here
+        print(self.namespace)
+        retriever = self.init_weaviate(self.namespace)
 
-                        # **source_metadata,
-                    },
-                    page_content=observation,
-                )]
-            )
+        return retriever.add_documents([
+            Document(
+                metadata={
+                    "text": observation,
+                    "user_id": str(self.user_id),
+                    "memory_id": str(self.memory_id),
+                    "ltm_memory_id": str(self.ltm_memory_id),
+                    "st_memory_id": str(self.st_memory_id),
+                    "buffer_id": str(self.buffer_id),
+                    "version": params.get('version', None) or "",
+                    "agreement_id": params.get('agreement_id', None) or "",
+                    "privacy_policy": params.get('privacy_policy', None) or "",
+                    "terms_of_service": params.get('terms_of_service', None) or "",
+                    "format": params.get('format', None) or "",
+                    "schema_version": params.get('schema_version', None) or "",
+                    "checksum": params.get('checksum', None) or "",
+                    "owner": params.get('owner', None) or "",
+                    "license": params.get('license', None) or "",
+                    "validity_start": params.get('validity_start', None) or "",
+                    "validity_end": params.get('validity_end', None) or ""
+
+                    # **source_metadata,
+                },
+                page_content=observation,
+            )]
+        )
 
     # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
     #     return Pinecone.from_existing_index(
@@ -229,78 +221,75 @@ class VectorDB:
     #     )
 
     async def fetch_memories(self, observation: str, namespace: str, params: dict = None):
-        if self.db_type == "pinecone":
-            # Fetch Pinecone memories here
-            pass
-        elif self.db_type == "weaviate":
-            # Fetch Weaviate memories here
-            """
-            Get documents from weaviate.
 
-            Args a json containing:
-                query (str): The query string.
-                path (list): The path for filtering, e.g., ['year'].
-                operator (str): The operator for filtering, e.g., 'Equal'.
-                valueText (str): The value for filtering, e.g., '2017*'.
+        # Fetch Weaviate memories here
+        """
+        Get documents from weaviate.
 
-            Example:
-                get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
-            """
-            client = self.init_weaviate_client(self.namespace)
+        Args a json containing:
+            query (str): The query string.
+            path (list): The path for filtering, e.g., ['year'].
+            operator (str): The operator for filtering, e.g., 'Equal'.
+            valueText (str): The value for filtering, e.g., '2017*'.
 
-            print(self.namespace)
-            print(str(datetime.now()))
-            print(observation)
-            if namespace is None:
-                namespace = self.namespace
+        Example:
+            get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
+        """
+        client = self.init_weaviate_client(self.namespace)
 
-            params_user_id = {
-                "path": ["user_id"],
-                "operator": "Like",
-                "valueText": self.user_id
-            }
+        print(self.namespace)
+        print(str(datetime.now()))
+        print(observation)
+        if namespace is None:
+            namespace = self.namespace
 
-            if params:
-                query_output = client.query.get(namespace, ["text"
-                    , "user_id"
-                    , "memory_id"
-                    , "ltm_memory_id"
-                    , "st_memory_id"
-                    , "buffer_id"
-                    , "version",
-                                                            "agreement_id",
-                                                            "privacy_policy",
-                                                            "terms_of_service",
-                                                            "format",
-                                                            "schema_version",
-                                                            "checksum",
-                                                            "owner",
-                                                            "license",
-                                                            "validity_start",
-                                                            "validity_end"]).with_where(params).with_additional(
-                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
-                return query_output
-            else:
-                query_output = client.query.get(namespace, ["text",
-                                                            "user_id",
-                                                            "memory_id",
-                                                            "ltm_memory_id",
-                                                            "st_memory_id",
-                                                            "buffer_id",
-                                                            "version",
-                                                            "agreement_id",
-                                                            "privacy_policy",
-                                                            "terms_of_service",
-                                                            "format",
-                                                            "schema_version",
-                                                            "checksum",
-                                                            "owner",
-                                                            "license",
-                                                            "validity_start",
-                                                            "validity_end"
-                                                            ]).with_additional(
-                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
-                return query_output
+        params_user_id = {
+            "path": ["user_id"],
+            "operator": "Like",
+            "valueText": self.user_id
+        }
+
+        if params:
+            query_output = client.query.get(namespace, ["text"
+                , "user_id"
+                , "memory_id"
+                , "ltm_memory_id"
+                , "st_memory_id"
+                , "buffer_id"
+                , "version",
+                                                        "agreement_id",
+                                                        "privacy_policy",
+                                                        "terms_of_service",
+                                                        "format",
+                                                        "schema_version",
+                                                        "checksum",
+                                                        "owner",
+                                                        "license",
+                                                        "validity_start",
+                                                        "validity_end"]).with_where(params).with_additional(
+                ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
+            return query_output
+        else:
+            query_output = client.query.get(namespace, ["text",
+                                                        "user_id",
+                                                        "memory_id",
+                                                        "ltm_memory_id",
+                                                        "st_memory_id",
+                                                        "buffer_id",
+                                                        "version",
+                                                        "agreement_id",
+                                                        "privacy_policy",
+                                                        "terms_of_service",
+                                                        "format",
+                                                        "schema_version",
+                                                        "checksum",
+                                                        "owner",
+                                                        "license",
+                                                        "validity_start",
+                                                        "validity_end"
+                                                        ]).with_additional(
+                ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
+            return query_output
 
     async def delete_memories(self, params: dict = None):
         client = self.init_weaviate_client(self.namespace)
@@ -359,150 +348,49 @@ class VectorDB:
         )
         return
 
-
-class SemanticMemory:
-    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
-                 namespace: str = "SEMANTICMEMORY"):
-        # Add any semantic memory-related attributes or setup here
+class BaseMemory:
+    def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str, namespace: str):
         self.user_id = user_id
+        self.memory_id = memory_id
         self.index_name = index_name
         self.namespace = namespace
-        self.semantic_memory_id = str(uuid.uuid4())
-        self.memory_id = memory_id
-        self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
-                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
+        self.memory_type_id = str(uuid.uuid4())
         self.db_type = db_type
+        factory = VectorDBFactory()
+        self.vector_db = factory.create_vector_db(self.user_id, self.index_name, self.memory_id, db_type=self.db_type,
+                                                  namespace=self.namespace)
 
-    async def _add_memories(self, semantic_memory: str = "None", params: dict = None) -> list[str]:
-        """Update semantic memory for the user"""
-
-        if self.db_type == "weaviate":
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=400,
-                chunk_overlap=20,
-                length_function=len,
-                is_separator_regex=False,
-            )
-            texts = text_splitter.create_documents([semantic_memory])
-            for text in texts:
-                out = await self.vector_db.add_memories(observation=text.page_content, params=params)
-                return out
-
-        elif self.db_type == "pinecone":
-            pass
-
-    async def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
-        """Fetch related characteristics, preferences or dislikes for a user."""
-        # self.init_pinecone(index_name=self.index)
-
-        if self.db_type == "weaviate":
-
-            return await self.vector_db.fetch_memories(observation, params)
-
-        elif self.db_type == "pinecone":
-            pass
-
-    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-        """Fetch related characteristics, preferences or dislikes for a user."""
-        # self.init_pinecone(index_name=self.index)
-
-        if self.db_type == "weaviate":
-
-            return await self.vector_db.delete_memories(params=params)
-
-        elif self.db_type == "pinecone":
-            pass
-
-
-class EpisodicMemory:
-    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
-                 namespace: str = "EPISODICMEMORY"):
-        # Add any semantic memory-related attributes or setup here
-        self.user_id = user_id
-        self.index_name = index_name
-        self.namespace = namespace
-        self.episodic_memory_id = str(uuid.uuid4())
-        self.memory_id = memory_id
-        self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
-                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
-        self.db_type = db_type
-
-    async def _add_memories(self, observation: str = None, params: dict = None) -> list[str]:
-        """Update semantic memory for the user"""
-
+    async def add_memories(self, observation: Optional[str] = None, params: Optional[dict] = None):
         if self.db_type == "weaviate":
             return await self.vector_db.add_memories(observation=observation, params=params)
+        # Add other db_type conditions if necessary
 
-        elif self.db_type == "pinecone":
-            pass
-
-    def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
-        """Fetch related characteristics, preferences or dislikes for a user."""
-        # self.init_pinecone(index_name=self.index)
-
+    async def fetch_memories(self, observation: str, params: Optional[str] = None):
         if self.db_type == "weaviate":
+            return await self.vector_db.fetch_memories(observation, params)
 
-            return self.vector_db.fetch_memories(observation, params)
-
-        elif self.db_type == "pinecone":
-            pass
-
-    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-        """Fetch related characteristics, preferences or dislikes for a user."""
-        # self.init_pinecone(index_name=self.index)
-
+    async def delete_memories(self, params: Optional[str] = None):
         if self.db_type == "weaviate":
+            return await self.vector_db.delete_memories(params)
 
-            return await self.vector_db.delete_memories(params=params)
-
-        elif self.db_type == "pinecone":
-            pass
+    # Additional methods for specific Memory can be added here
 
 
-class LongTermMemory:
-    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
-                 db_type: str = "weaviate"):
-        self.user_id = user_id
-        self.memory_id = memory_id
-        self.ltm_memory_id = str(uuid.uuid4())
-        self.index_name = index_name
-        self.namespace = namespace
-        self.db_type = db_type
-        # self.episodic_memory = EpisodicMemory()
-        self.semantic_memory = SemanticMemory(user_id=self.user_id, memory_id=self.memory_id,
-                                              ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
-                                              db_type=self.db_type)
-        self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id,
-                                              ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
-                                              db_type=self.db_type)
+class SemanticMemory(BaseMemory):
+    def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"):
+        super().__init__(user_id, memory_id, index_name, db_type, namespace="SEMANTICMEMORY")
 
 
-class ShortTermMemory:
-    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
-                 db_type: str = "weaviate"):
-        # Add any short-term memory-related attributes or setup here
-        self.user_id = user_id
-        self.memory_id = memory_id
-        self.namespace = namespace
-        self.db_type = db_type
-        self.stm_memory_id = str(uuid.uuid4())
-        self.index_name = index_name
-        self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id,
-                                              index_name=self.index_name, db_type=self.db_type)
+class EpisodicMemory(BaseMemory):
+    def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"):
+        super().__init__(user_id, memory_id, index_name, db_type, namespace="EPISODICMEMORY")
 
 
-class EpisodicBuffer:
-    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None,
-                 namespace: str = 'EPISODICBUFFER', db_type: str = "weaviate"):
-        # Add any short-term memory-related attributes or setup here
-        self.user_id = user_id
-        self.memory_id = memory_id
-        self.namespace = namespace
-        self.db_type = db_type
+class EpisodicBuffer(EpisodicMemory):
+    def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"):
+        super().__init__(user_id, memory_id, index_name, db_type)
+
         self.st_memory_id = "blah"
-        self.index_name = index_name
         self.llm = ChatOpenAI(
             temperature=0.0,
             max_tokens=1200,
@@ -517,60 +405,14 @@ class EpisodicBuffer:
             model_name="gpt-4-0613"
         )
 
-        # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
 
-    # async def infer_schema_from_text(self, text: str):
-    #     """Infer schema from text"""
-    #
-    #     prompt_ = """ You are a json schema master. Create a JSON schema based on the following data and don't write anything else: {prompt} """
-    #
-    #     complete_query = PromptTemplate(
-    #         input_variables=["prompt"],
-    #         template=prompt_,
-    #     )
-    #
-    #     chain = LLMChain(
-    #         llm=self.llm, prompt=complete_query, verbose=True
-    #     )
-    #     chain_result = chain.run(prompt=text).strip()
-    #
-    #     json_data = json.dumps(chain_result)
-    #     return json_data
-
-    async def _fetch_memories(self, observation: str, namespace: str) -> str:
-        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-                             index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-
-        query = await vector_db.fetch_memories(observation=observation, namespace=namespace)
-        return query
-
-    async def _add_memories(self, observation: str, namespace: str, params: dict = None):
-        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-                             index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-
-        query = await vector_db.add_memories(observation, params=params)
-        return query
-
-    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-        """Fetch related characteristics, preferences or dislikes for a user."""
-        # self.init_pinecone(index_name=self.index)
-        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-                             index_name=self.index_name, db_type=self.db_type, namespace=self.namespace)
-
-        if self.db_type == "weaviate":
-
-            return await vector_db.delete_memories(params=params)
-
-        elif self.db_type == "pinecone":
-            pass
-
-    async def freshness(self, observation: str,namespace:str=None) -> list[str]:
+    async def freshness(self, observation: str, namespace: str = None) -> list[str]:
         """Freshness - Score between 1 and 5  on how often was the information updated in episodic or semantic memory in the past"""
 
         memory = Memory(user_id=self.user_id)
         await memory.async_init()
 
-        lookup_value = await memory._fetch_episodic_memory(observation = observation)
+        lookup_value = await memory._fetch_episodic_memory(observation=observation)
         unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"]
 
         # Convert Unix timestamp to datetime
@@ -578,6 +420,7 @@ class EpisodicBuffer:
         time_difference = datetime.now() - last_update_datetime
         time_difference_text = humanize.naturaltime(time_difference)
         marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
+
         @ai_classifier
         class MemoryRoute(Enum):
             """Represents classifer for freshness of memories"""
@@ -592,8 +435,7 @@ class EpisodicBuffer:
         namespace = MemoryRoute(str(time_difference_text))
         return [namespace.value, lookup_value]
 
-
-    async def frequency(self, observation: str,namespace:str) -> list[str]:
+    async def frequency(self, observation: str, namespace: str) -> list[str]:
         """Frequency - Score between 1 and 5 on how often was the information processed in episodic memory in the past
            Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """
         client = self.init_weaviate_client(self.namespace)
@@ -603,12 +445,10 @@ class EpisodicBuffer:
 
         result_output = await memory._fetch_episodic_memory(observation=observation)
         number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"])
-        number_of_total_events = client.query.aggregate( self.namespace).with_meta_count().do()
+        number_of_total_events = client.query.aggregate(self.namespace).with_meta_count().do()
         frequency = float(number_of_relevant_events) / float(number_of_total_events)
         return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]]
 
-
-
     async def relevance(self, observation: str) -> list[str]:
         """Relevance - Score between 1 and 5 on how often was the final information relevant to the user in the past.
            Stored in the episodic memory, mainly to show how well a buffer did the job
@@ -637,7 +477,7 @@ class EpisodicBuffer:
 
     # return ggur
 
-    async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params:dict=None) -> list[str]:
+    async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params: dict = None) -> list[str]:
         """Encoding for the buffer, stores raw data in the buffer
         Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
         vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
@@ -649,7 +489,8 @@ class EpisodicBuffer:
     async def available_operations(self) -> list[str]:
         """Determines what operations are available for the user to process PDFs"""
 
-        return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory", "load to buffer"]
+        return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory",
+                "load to buffer"]
 
     async def main_buffer(self, user_input=None, content=None, params=None):
 
@@ -663,8 +504,7 @@ class EpisodicBuffer:
         await memory.async_init()
         await memory._delete_buffer_memory()
 
-
-        #we just filter the data here to make sure input is clean
+        # we just filter the data here to make sure input is clean
         prompt_filter = ChatPromptTemplate.from_template(
             "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}")
         chain_filter = prompt_filter | self.llm
@@ -675,7 +515,7 @@ class EpisodicBuffer:
         if params:
 
             if "freshness" in params:
-                params.get('freshness', None) # get the value of freshness
+                params.get('freshness', None)  # get the value of freshness
                 freshness = await self.freshness(observation=str(output))
                 context.append(freshness)
 
@@ -685,11 +525,11 @@ class EpisodicBuffer:
                 print("freshness", frequency)
                 context.append(frequency)
 
-                #fix this so it actually filters
+                # fix this so it actually filters
 
 
         else:
-            #defaults to semantic search if we don't want to apply algorithms on the vectordb data
+            # defaults to semantic search if we don't want to apply algorithms on the vectordb data
             memory = Memory(user_id=self.user_id)
             await memory.async_init()
 
@@ -697,27 +537,27 @@ class EpisodicBuffer:
             lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output))
             lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
 
-
             context.append(lookup_value_buffer)
             context.append(lookup_value_semantic)
             context.append(lookup_value_episodic)
 
-            #copy the context over into the buffer
-            #do i need to do it for the episodic + raw data, might make sense
-        print( "HERE IS THE CONTEXT", context)
+            # copy the context over into the buffer
+            # do i need to do it for the episodic + raw data, might make sense
+        print("HERE IS THE CONTEXT", context)
+
         class BufferRawContextTerms(BaseModel):
             """Schema for documentGroups"""
-            semantic_search_term: str = Field(..., description="The search term to use to get relevant input based on user query")
+            semantic_search_term: str = Field(...,
+                                              description="The search term to use to get relevant input based on user query")
             document_description: str = Field(None, description="The short summary of what the document is about")
-            document_relevance: str = Field(None, description="The relevance of the document for the task on the scale from 1 to 5")
-
+            document_relevance: str = Field(None,
+                                            description="The relevance of the document for the task on the scale from 1 to 5")
 
         class BufferRawContextList(BaseModel):
             """Buffer raw context processed by the buffer"""
             docs: List[BufferRawContextTerms] = Field(..., description="List of docs")
             user_query: str = Field(..., description="The original user query")
 
-
         parser = PydanticOutputParser(pydantic_object=BufferRawContextList)
 
         prompt = PromptTemplate(
@@ -726,13 +566,14 @@ class EpisodicBuffer:
             partial_variables={"format_instructions": parser.get_format_instructions()},
         )
 
-        _input = prompt.format_prompt(query=user_input,  context=context)
+        _input = prompt.format_prompt(query=user_input, context=context)
 
         document_context_result = self.llm_base(_input.to_string())
 
         document_context_result_parsed = parser.parse(document_context_result)
 
-        print("HERE ARE THE DOCS PARSED AND STRUCTURED",document_context_result_parsed)
+        print("HERE ARE THE DOCS PARSED AND STRUCTURED", document_context_result_parsed)
+
         class Task(BaseModel):
             """Schema for an individual task."""
             task_order: str = Field(..., description="The order at which the task needs to be performed")
@@ -772,15 +613,16 @@ class EpisodicBuffer:
         # Extract the list of tasks
         tasks_list = data["tasks"]
 
-        result_tasks =[]
+        result_tasks = []
 
         for task in tasks_list:
             class PromptWrapper(BaseModel):
                 observation: str = Field(
                     description="observation we want to fetch from vectordb"
                 )
+
             @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
-            def convert_to_structured( observation=None, json_schema=None):
+            def convert_to_structured(observation=None, json_schema=None):
                 """Convert unstructured data to structured data"""
                 BASE_DIR = os.getcwd()
                 json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
@@ -793,7 +635,8 @@ class EpisodicBuffer:
                         json_schema = json.load(file)
                     return json_schema
 
-                json_schema =load_json_or_infer_schema(json_path, None)
+                json_schema = load_json_or_infer_schema(json_path, None)
+
                 def run_open_ai_mapper(observation=None, json_schema=None):
                     """Convert unstructured data to structured data"""
 
@@ -806,12 +649,14 @@ class EpisodicBuffer:
                         HumanMessage(content="Tips: Make sure to answer in the correct format"),
                     ]
                     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-                    chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
+                    chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm,
+                                                                 verbose=True)
                     output = chain_funct.run(input=observation, llm=self.llm)
                     return output
 
                 result = run_open_ai_mapper(observation, json_schema)
                 return result
+
             class TranslateText(BaseModel):
                 observation: str = Field(
                     description="observation we want to translate"
@@ -836,20 +681,14 @@ class EpisodicBuffer:
             result_tasks.append(task)
             result_tasks.append(output)
 
-
-
         print("HERE IS THE RESULT TASKS", str(result_tasks))
 
-
         await self.encoding(str(result_tasks), self.namespace, params=params)
 
-
-
         buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
 
         print("HERE IS THE RESULT TASKS", str(buffer_result))
 
-
         class EpisodicTask(BaseModel):
             """Schema for an individual task."""
             task_order: str = Field(..., description="The order at which the task needs to be performed")
@@ -887,15 +726,36 @@ class EpisodicBuffer:
         await memory.async_init()
         #
         lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
-        #now we clean up buffer memory
+        # now we clean up buffer memory
 
         await memory._delete_buffer_memory()
         return lookup_value
 
 
-        #load to buffer once is done
+class LongTermMemory:
+    def __init__(self, user_id: str = "676", memory_id: Optional[str] = None, index_name: Optional[str] = None,
+                 db_type: str = "weaviate"):
+        self.user_id = user_id
+        self.memory_id = memory_id
+        self.ltm_memory_id = str(uuid.uuid4())
+        self.index_name = index_name
+        self.db_type = db_type
+        self.semantic_memory = SemanticMemory(user_id, memory_id, index_name, db_type)
+        self.episodic_memory = EpisodicMemory(user_id, memory_id, index_name, db_type)
+
+
+class ShortTermMemory:
+    def __init__(self, user_id: str = "676", memory_id: Optional[str] = None, index_name: Optional[str] = None,
+                 db_type: str = "weaviate"):
+        self.user_id = user_id
+        self.memory_id = memory_id
+        self.stm_memory_id = str(uuid.uuid4())
+        self.index_name = index_name
+        self.db_type = db_type
+        self.episodic_buffer = EpisodicBuffer(user_id, memory_id, index_name, db_type)
+
+
 
-        #fetch everything in the current session and load to episodic memory
 
 
 
@@ -919,18 +779,18 @@ class Memory:
         load_dotenv()
 
     # Asynchronous factory function for creating LongTermMemory
-    async def async_create_long_term_memory(self, user_id, memory_id, index_name, namespace, db_type):
+    async def async_create_long_term_memory(self, user_id, memory_id, index_name, db_type):
         # Perform asynchronous initialization steps if needed
         return LongTermMemory(
             user_id=user_id, memory_id=memory_id, index_name=index_name,
-            namespace=namespace, db_type=db_type
+             db_type=db_type
         )
 
     async def async_init(self):
         # Asynchronous initialization of LongTermMemory and ShortTermMemory
         self.long_term_memory = await self.async_create_long_term_memory(
             user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
-            namespace=self.namespace, db_type=self.db_type
+             db_type=self.db_type
         )
 
     async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type):
@@ -1003,6 +863,808 @@ class Memory:
     async def _available_operations(self):
         return await self.long_term_memory.episodic_buffer._available_operations()
 
+
+
+# Usage
+# factory = VectorDBFactory()
+# # Example usage with placeholders; you'd replace these with actual values.
+# user_id = "some_user_id"
+# index_name = "some_index_name"
+# memory_id = "some_memory_id"
+# db_type = "pinecone"
+# namespace = "some_namespace"
+#
+# vector_db = factory.create_vector_db(user_id, index_name, memory_id, db_type=db_type, namespace=namespace)
+
+
+# class VectorDB:
+#     OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
+#     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+#
+#     def __init__(self, user_id: str, index_name: str, memory_id: str, ltm_memory_id: str = '00000',
+#                  st_memory_id: str = '0000', buffer_id: str = '0000', db_type: str = "pinecone", namespace: str = None):
+#         self.user_id = user_id
+#         self.index_name = index_name
+#         self.db_type = db_type
+#         self.namespace = namespace
+#         self.memory_id = memory_id
+#         self.ltm_memory_id = ltm_memory_id
+#         self.st_memory_id = st_memory_id
+#         self.buffer_id = buffer_id
+#         # if self.db_type == "pinecone":
+#         #     self.vectorstore = self.init_pinecone(self.index_name)
+#         if self.db_type == "weaviate":
+#             self.init_weaviate(namespace=self.namespace)
+#         else:
+#             raise ValueError(f"Unsupported database type: {db_type}")
+#         if self.db_type == "weaviate":
+#             self.init_weaviate_client(namespace=self.namespace)
+#         else:
+#             raise ValueError(f"Unsupported VectorDB client type: {db_type}")
+#         load_dotenv()
+#
+#     def init_pinecone(self, index_name):
+#         load_dotenv()
+#         PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
+#         PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
+#         pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
+#         pinecone.Index(index_name)
+#         vectorstore: Pinecone = Pinecone.from_existing_index(
+#
+#             index_name=self.index_name,
+#             embedding=OpenAIEmbeddings(),
+#             namespace='RESULT'
+#         )
+#         return vectorstore
+#
+#     def init_weaviate_client(self, namespace: str):
+#         embeddings = OpenAIEmbeddings()
+#         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
+#         client = weaviate.Client(
+#             url=os.environ.get('WEAVIATE_URL'),
+#             auth_client_secret=auth_config,
+#
+#             additional_headers={
+#                 "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
+#             }
+#         )
+#         return client
+#
+#     def init_weaviate(self, namespace: str):
+#         embeddings = OpenAIEmbeddings()
+#         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
+#         client = weaviate.Client(
+#             url=os.environ.get('WEAVIATE_URL'),
+#             auth_client_secret=auth_config,
+#
+#             additional_headers={
+#                 "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
+#             }
+#         )
+#         retriever = WeaviateHybridSearchRetriever(
+#             client=client,
+#             index_name=namespace,
+#             text_key="text",
+#             attributes=[],
+#             embedding=embeddings,
+#             create_schema_if_missing=True,
+#         )
+#         return retriever
+#
+#     async def add_memories(self, observation: str, params: dict = None):
+#         if self.db_type == "pinecone":
+#             # Update Pinecone memories here
+#             vectorstore: Pinecone = Pinecone.from_existing_index(
+#                 index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
+#             )
+#             retriever = vectorstore.as_retriever()
+#             retriever.add_documents(
+#                 [
+#                     Document(
+#                         page_content=observation,
+#                         metadata={
+#                             "inserted_at": datetime.now(),
+#                             "text": observation,
+#                             "user_id": self.user_id,
+#                             "version": params.get('version', None) or "",
+#                             "agreement_id": params.get('agreement_id', None) or "",
+#                             "privacy_policy": params.get('privacy_policy', None) or "",
+#                             "terms_of_service": params.get('terms_of_service', None) or "",
+#                             "format": params.get('format', None) or "",
+#                             "schema_version": params.get('schema_version', None) or "",
+#                             "checksum": params.get('checksum', None) or "",
+#                             "owner": params.get('owner', None) or "",
+#                             "license": params.get('license', None) or "",
+#                             "validity_start": params.get('validity_start', None) or "",
+#                             "validity_end": params.get('validity_end', None) or ""
+#                         },
+#                         namespace=self.namespace,
+#                     )
+#                 ]
+#             )
+#         elif self.db_type == "weaviate":
+#             # Update Weaviate memories here
+#             print(self.namespace)
+#             retriever = self.init_weaviate(self.namespace)
+#
+#             return retriever.add_documents([
+#                 Document(
+#                     metadata={
+#                         "text": observation,
+#                         "user_id": str(self.user_id),
+#                         "memory_id": str(self.memory_id),
+#                         "ltm_memory_id": str(self.ltm_memory_id),
+#                         "st_memory_id": str(self.st_memory_id),
+#                         "buffer_id": str(self.buffer_id),
+#                         "version": params.get('version', None) or "",
+#                         "agreement_id": params.get('agreement_id', None) or "",
+#                         "privacy_policy": params.get('privacy_policy', None) or "",
+#                         "terms_of_service": params.get('terms_of_service', None) or "",
+#                         "format": params.get('format', None) or "",
+#                         "schema_version": params.get('schema_version', None) or "",
+#                         "checksum": params.get('checksum', None) or "",
+#                         "owner": params.get('owner', None) or "",
+#                         "license": params.get('license', None) or "",
+#                         "validity_start": params.get('validity_start', None) or "",
+#                         "validity_end": params.get('validity_end', None) or ""
+#
+#                         # **source_metadata,
+#                     },
+#                     page_content=observation,
+#                 )]
+#             )
+#
+#     # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
+#     #     return Pinecone.from_existing_index(
+#     #         index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
+#     #     )
+#
+#     async def fetch_memories(self, observation: str, namespace: str, params: dict = None):
+#         if self.db_type == "pinecone":
+#             # Fetch Pinecone memories here
+#             pass
+#         elif self.db_type == "weaviate":
+#             # Fetch Weaviate memories here
+#             """
+#             Get documents from weaviate.
+#
+#             Args a json containing:
+#                 query (str): The query string.
+#                 path (list): The path for filtering, e.g., ['year'].
+#                 operator (str): The operator for filtering, e.g., 'Equal'.
+#                 valueText (str): The value for filtering, e.g., '2017*'.
+#
+#             Example:
+#                 get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
+#             """
+#             client = self.init_weaviate_client(self.namespace)
+#
+#             print(self.namespace)
+#             print(str(datetime.now()))
+#             print(observation)
+#             if namespace is None:
+#                 namespace = self.namespace
+#
+#             params_user_id = {
+#                 "path": ["user_id"],
+#                 "operator": "Like",
+#                 "valueText": self.user_id
+#             }
+#
+#             if params:
+#                 query_output = client.query.get(namespace, ["text"
+#                     , "user_id"
+#                     , "memory_id"
+#                     , "ltm_memory_id"
+#                     , "st_memory_id"
+#                     , "buffer_id"
+#                     , "version",
+#                                                             "agreement_id",
+#                                                             "privacy_policy",
+#                                                             "terms_of_service",
+#                                                             "format",
+#                                                             "schema_version",
+#                                                             "checksum",
+#                                                             "owner",
+#                                                             "license",
+#                                                             "validity_start",
+#                                                             "validity_end"]).with_where(params).with_additional(
+#                     ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
+#                 return query_output
+#             else:
+#                 query_output = client.query.get(namespace, ["text",
+#                                                             "user_id",
+#                                                             "memory_id",
+#                                                             "ltm_memory_id",
+#                                                             "st_memory_id",
+#                                                             "buffer_id",
+#                                                             "version",
+#                                                             "agreement_id",
+#                                                             "privacy_policy",
+#                                                             "terms_of_service",
+#                                                             "format",
+#                                                             "schema_version",
+#                                                             "checksum",
+#                                                             "owner",
+#                                                             "license",
+#                                                             "validity_start",
+#                                                             "validity_end"
+#                                                             ]).with_additional(
+#                     ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
+#                 return query_output
+#
+#     async def delete_memories(self, params: dict = None):
+#         client = self.init_weaviate_client(self.namespace)
+#         if params:
+#             where_filter = {
+#                 "path": ["id"],
+#                 "operator": "Equal",
+#                 "valueText": params.get('id', None)
+#             }
+#             return client.batch.delete_objects(
+#                 class_name=self.namespace,
+#                 # Same `where` filter as in the GraphQL API
+#                 where=where_filter,
+#             )
+#         else:
+#             # Delete all objects
+#
+#             return client.batch.delete_objects(
+#                 class_name=self.namespace,
+#                 where={
+#                     'path': ['user_id'],
+#                     'operator': 'Equal',
+#                     'valueText': self.user_id
+#                 }
+#             )
+#
+#     def update_memories(self, observation, namespace: str, params: dict = None):
+#         client = self.init_weaviate_client(self.namespace)
+#
+#         client.data_object.update(
+#             data_object={
+#                 "text": observation,
+#                 "user_id": str(self.user_id),
+#                 "memory_id": str(self.memory_id),
+#                 "ltm_memory_id": str(self.ltm_memory_id),
+#                 "st_memory_id": str(self.st_memory_id),
+#                 "buffer_id": str(self.buffer_id),
+#                 "version": params.get('version', None) or "",
+#                 "agreement_id": params.get('agreement_id', None) or "",
+#                 "privacy_policy": params.get('privacy_policy', None) or "",
+#                 "terms_of_service": params.get('terms_of_service', None) or "",
+#                 "format": params.get('format', None) or "",
+#                 "schema_version": params.get('schema_version', None) or "",
+#                 "checksum": params.get('checksum', None) or "",
+#                 "owner": params.get('owner', None) or "",
+#                 "license": params.get('license', None) or "",
+#                 "validity_start": params.get('validity_start', None) or "",
+#                 "validity_end": params.get('validity_end', None) or ""
+#
+#                 # **source_metadata,
+#
+#             },
+#             class_name="Test",
+#             uuid=params.get('id', None),
+#             consistency_level=weaviate.data.replication.ConsistencyLevel.ALL,  # default QUORUM
+#         )
+#         return
+# class SemanticMemory:
+#     def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
+#                  namespace: str = "SEMANTICMEMORY"):
+#         # Add any semantic memory-related attributes or setup here
+#         self.user_id = user_id
+#         self.index_name = index_name
+#         self.namespace = namespace
+#         self.semantic_memory_id = str(uuid.uuid4())
+#         self.memory_id = memory_id
+#         self.ltm_memory_id = ltm_memory_id
+#         self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
+#                                   index_name=index_name, db_type=db_type, namespace=self.namespace)
+#         self.db_type = db_type
+#
+#     async def _add_memories(self, semantic_memory: str = "None", params: dict = None) -> list[str]:
+#         """Update semantic memory for the user"""
+#
+#         if self.db_type == "weaviate":
+#             text_splitter = RecursiveCharacterTextSplitter(
+#                 chunk_size=400,
+#                 chunk_overlap=20,
+#                 length_function=len,
+#                 is_separator_regex=False,
+#             )
+#             texts = text_splitter.create_documents([semantic_memory])
+#             for text in texts:
+#                 out = await self.vector_db.add_memories(observation=text.page_content, params=params)
+#                 return out
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#     async def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
+#         """Fetch related characteristics, preferences or dislikes for a user."""
+#         # self.init_pinecone(index_name=self.index)
+#
+#         if self.db_type == "weaviate":
+#
+#             return await self.vector_db.fetch_memories(observation, params)
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#     async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
+#         """Fetch related characteristics, preferences or dislikes for a user."""
+#         # self.init_pinecone(index_name=self.index)
+#
+#         if self.db_type == "weaviate":
+#
+#             return await self.vector_db.delete_memories(params=params)
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#
+# class EpisodicMemory:
+#     def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
+#                  namespace: str = "EPISODICMEMORY"):
+#         # Add any semantic memory-related attributes or setup here
+#         self.user_id = user_id
+#         self.index_name = index_name
+#         self.namespace = namespace
+#         self.episodic_memory_id = str(uuid.uuid4())
+#         self.memory_id = memory_id
+#         self.ltm_memory_id = ltm_memory_id
+#         self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
+#                                   index_name=index_name, db_type=db_type, namespace=self.namespace)
+#         self.db_type = db_type
+#
+#     async def _add_memories(self, observation: str = None, params: dict = None) -> list[str]:
+#         """Update semantic memory for the user"""
+#
+#         if self.db_type == "weaviate":
+#             return await self.vector_db.add_memories(observation=observation, params=params)
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#     def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
+#         """Fetch related characteristics, preferences or dislikes for a user."""
+#         # self.init_pinecone(index_name=self.index)
+#
+#         if self.db_type == "weaviate":
+#
+#             return self.vector_db.fetch_memories(observation, params)
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#     async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
+#         """Fetch related characteristics, preferences or dislikes for a user."""
+#         # self.init_pinecone(index_name=self.index)
+#
+#         if self.db_type == "weaviate":
+#
+#             return await self.vector_db.delete_memories(params=params)
+#
+#         elif self.db_type == "pinecone":
+#             pass
+#
+#
+# class LongTermMemory:
+#     def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
+#                  db_type: str = "weaviate"):
+#         self.user_id = user_id
+#         self.memory_id = memory_id
+#         self.ltm_memory_id = str(uuid.uuid4())
+#         self.index_name = index_name
+#         self.namespace = namespace
+#         self.db_type = db_type
+#         # self.episodic_memory = EpisodicMemory()
+#         self.semantic_memory = SemanticMemory(user_id=self.user_id, memory_id=self.memory_id,
+#                                               ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
+#                                               db_type=self.db_type)
+#         self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id,
+#                                               ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
+#                                               db_type=self.db_type)
+#
+#
+# class ShortTermMemory:
+#     def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
+#                  db_type: str = "weaviate"):
+#         # Add any short-term memory-related attributes or setup here
+#         self.user_id = user_id
+#         self.memory_id = memory_id
+#         self.namespace = namespace
+#         self.db_type = db_type
+#         self.stm_memory_id = str(uuid.uuid4())
+#         self.index_name = index_name
+#         self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id,
+#                                               index_name=self.index_name, db_type=self.db_type)
+
+
+class EpisodicBuffer:
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None,
+                 namespace: str = 'EPISODICBUFFER', db_type: str = "weaviate"):
+        # Add any short-term memory-related attributes or setup here
+        self.user_id = user_id
+        self.memory_id = memory_id
+        self.namespace = namespace
+        self.db_type = db_type
+        self.st_memory_id = "blah"
+        self.index_name = index_name
+        self.llm = ChatOpenAI(
+            temperature=0.0,
+            max_tokens=1200,
+            openai_api_key=os.environ.get('OPENAI_API_KEY'),
+            model_name="gpt-4-0613",
+            callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
+        )
+        self.llm_base = OpenAI(
+            temperature=0.0,
+            max_tokens=1200,
+            openai_api_key=os.environ.get('OPENAI_API_KEY'),
+            model_name="gpt-4-0613"
+        )
+
+    # async def _fetch_memories(self, observation: str, namespace: str) -> str:
+    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
+    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
+    #
+    #     query = await vector_db.fetch_memories(observation=observation, namespace=namespace)
+    #     return query
+    #
+    # async def _add_memories(self, observation: str, namespace: str, params: dict = None):
+    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
+    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
+    #
+    #     query = await vector_db.add_memories(observation, params=params)
+    #     return query
+    #
+    # async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
+    #     """Fetch related characteristics, preferences or dislikes for a user."""
+    #     # self.init_pinecone(index_name=self.index)
+    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
+    #                          index_name=self.index_name, db_type=self.db_type, namespace=self.namespace)
+    #
+    #     if self.db_type == "weaviate":
+    #
+    #         return await vector_db.delete_memories(params=params)
+    #
+    #     elif self.db_type == "pinecone":
+    #         pass
+    #
+    # async def freshness(self, observation: str,namespace:str=None) -> list[str]:
+    #     """Freshness - Score between 1 and 5  on how often was the information updated in episodic or semantic memory in the past"""
+    #
+    #     memory = Memory(user_id=self.user_id)
+    #     await memory.async_init()
+    #
+    #     lookup_value = await memory._fetch_episodic_memory(observation = observation)
+    #     unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"]
+    #
+    #     # Convert Unix timestamp to datetime
+    #     last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000)
+    #     time_difference = datetime.now() - last_update_datetime
+    #     time_difference_text = humanize.naturaltime(time_difference)
+    #     marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
+    #     @ai_classifier
+    #     class MemoryRoute(Enum):
+    #         """Represents classifer for freshness of memories"""
+    #
+    #         data_uploaded_now = "0"
+    #         data_uploaded_very_recently = "1"
+    #         data_uploaded_recently = "2"
+    #         data_uploaded_more_than_a_month_ago = "3"
+    #         data_uploaded_more_than_three_months_ago = "4"
+    #         data_uploaded_more_than_six_months_ago = "5"
+    #
+    #     namespace = MemoryRoute(str(time_difference_text))
+    #     return [namespace.value, lookup_value]
+    #
+    #
+    # async def frequency(self, observation: str,namespace:str) -> list[str]:
+    #     """Frequency - Score between 1 and 5 on how often was the information processed in episodic memory in the past
+    #        Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """
+    #     client = self.init_weaviate_client(self.namespace)
+    #
+    #     memory = Memory(user_id=self.user_id)
+    #     await memory.async_init()
+    #
+    #     result_output = await memory._fetch_episodic_memory(observation=observation)
+    #     number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"])
+    #     number_of_total_events = client.query.aggregate( self.namespace).with_meta_count().do()
+    #     frequency = float(number_of_relevant_events) / float(number_of_total_events)
+    #     return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]]
+    #
+    #
+    #
+    # async def relevance(self, observation: str) -> list[str]:
+    #     """Relevance - Score between 1 and 5 on how often was the final information relevant to the user in the past.
+    #        Stored in the episodic memory, mainly to show how well a buffer did the job
+    #        Starts at 1, gets updated based on the user feedback """
+    #
+    #     return ["5", "memory"]
+    #
+    # async def saliency(self, observation: str) -> list[str]:
+    #     """Determines saliency by finding relevance between user input and document schema values.
+    #     After finding document schena value relevant for the user, it forms a new query based on the schema value and the user input """
+    #
+    #     return ["5", "memory"]
+    #
+    # # @ai_classifier
+    # # class MemoryRoute(Enum):
+    # #     """Represents classifer for freshness of memories"""
+    # #
+    # #     data_uploaded_now = "0"
+    # #     data_uploaded_very_recently = "1"
+    # #     data_uploaded_recently = "2"
+    # #     data_uploaded_more_than_a_month_ago = "3"
+    # #     data_uploaded_more_than_three_months_ago = "4"
+    # #     data_uploaded_more_than_six_months_ago = "5"
+    # #
+    # # namespace= MemoryRoute(observation)
+    #
+    # # return ggur
+    #
+    # async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params:dict=None) -> list[str]:
+    #     """Encoding for the buffer, stores raw data in the buffer
+    #     Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
+    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
+    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
+    #
+    #     query = await vector_db.add_memories(document, params=params)
+    #     return query
+    #
+    # async def available_operations(self) -> list[str]:
+    #     """Determines what operations are available for the user to process PDFs"""
+    #
+    #     return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory", "load to buffer"]
+    #
+    # async def main_buffer(self, user_input=None, content=None, params=None):
+    #
+    #     """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
+    #
+    #     # we get a list of available operations for our buffer to consider
+    #     # these operations are what we can do with the data, in the context of PDFs (load, translate, structure, etc)
+    #     list_of_operations = await self.available_operations()
+    #
+    #     memory = Memory(user_id=self.user_id)
+    #     await memory.async_init()
+    #     await memory._delete_buffer_memory()
+    #
+    #
+    #     #we just filter the data here to make sure input is clean
+    #     prompt_filter = ChatPromptTemplate.from_template(
+    #         "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}")
+    #     chain_filter = prompt_filter | self.llm
+    #     output = await chain_filter.ainvoke({"query": user_input})
+    #
+    #     # this part is mostly unfinished but the idea is to apply different algorithms to the data to fetch the most relevant information from the vector stores
+    #     context = []
+    #     if params:
+    #
+    #         if "freshness" in params:
+    #             params.get('freshness', None) # get the value of freshness
+    #             freshness = await self.freshness(observation=str(output))
+    #             context.append(freshness)
+    #
+    #         elif "frequency" in params:
+    #             params.get('freshness', None)
+    #             frequency = await self.freshness(observation=str(output))
+    #             print("freshness", frequency)
+    #             context.append(frequency)
+    #
+    #             #fix this so it actually filters
+    #
+    #
+    #     else:
+    #         #defaults to semantic search if we don't want to apply algorithms on the vectordb data
+    #         memory = Memory(user_id=self.user_id)
+    #         await memory.async_init()
+    #
+    #         lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output))
+    #         lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output))
+    #         lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+    #
+    #
+    #         context.append(lookup_value_buffer)
+    #         context.append(lookup_value_semantic)
+    #         context.append(lookup_value_episodic)
+    #
+    #         #copy the context over into the buffer
+    #         #do i need to do it for the episodic + raw data, might make sense
+    #     print( "HERE IS THE CONTEXT", context)
+    #     class BufferRawContextTerms(BaseModel):
+    #         """Schema for documentGroups"""
+    #         semantic_search_term: str = Field(..., description="The search term to use to get relevant input based on user query")
+    #         document_description: str = Field(None, description="The short summary of what the document is about")
+    #         document_relevance: str = Field(None, description="The relevance of the document for the task on the scale from 1 to 5")
+    #
+    #
+    #     class BufferRawContextList(BaseModel):
+    #         """Buffer raw context processed by the buffer"""
+    #         docs: List[BufferRawContextTerms] = Field(..., description="List of docs")
+    #         user_query: str = Field(..., description="The original user query")
+    #
+    #
+    #     parser = PydanticOutputParser(pydantic_object=BufferRawContextList)
+    #
+    #     prompt = PromptTemplate(
+    #         template="Summarize and create semantic search queries and relevant document summaries for the user query.\n{format_instructions}\nOriginal query is: {query}\n Retrieved context is: {context}",
+    #         input_variables=["query", "context"],
+    #         partial_variables={"format_instructions": parser.get_format_instructions()},
+    #     )
+    #
+    #     _input = prompt.format_prompt(query=user_input,  context=context)
+    #
+    #     document_context_result = self.llm_base(_input.to_string())
+    #
+    #     document_context_result_parsed = parser.parse(document_context_result)
+    #
+    #     print("HERE ARE THE DOCS PARSED AND STRUCTURED",document_context_result_parsed)
+    #     class Task(BaseModel):
+    #         """Schema for an individual task."""
+    #         task_order: str = Field(..., description="The order at which the task needs to be performed")
+    #         task_name: str = Field(None, description="The task that needs to be performed")
+    #         operation: str = Field(None, description="The operation to be performed")
+    #         original_query: str = Field(None, description="Original user query provided")
+    #
+    #     class TaskList(BaseModel):
+    #         """Schema for the record containing a list of tasks."""
+    #         tasks: List[Task] = Field(..., description="List of tasks")
+    #
+    #     prompt_filter_chunk = f"The raw context data is {str(document_context_result_parsed)} Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
+    #     # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
+    #     # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
+    #     prompt_msgs = [
+    #         SystemMessage(
+    #             content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
+    #         ),
+    #         HumanMessage(content="Decompose based on the following prompt:"),
+    #         HumanMessagePromptTemplate.from_template("{input}"),
+    #         HumanMessage(content="Tips: Make sure to answer in the correct format"),
+    #         HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
+    #
+    #     ]
+    #     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+    #     chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
+    #     from langchain.callbacks import get_openai_callback
+    #     with get_openai_callback() as cb:
+    #         output = await chain.arun(input=prompt_filter_chunk, verbose=True)
+    #         print(cb)
+    #     # output = json.dumps(output)
+    #     my_object = parse_obj_as(TaskList, output)
+    #     print("HERE IS THE OUTPUT", my_object.json())
+    #
+    #     data = json.loads(my_object.json())
+    #
+    #     # Extract the list of tasks
+    #     tasks_list = data["tasks"]
+    #
+    #     result_tasks =[]
+    #
+    #     for task in tasks_list:
+    #         class PromptWrapper(BaseModel):
+    #             observation: str = Field(
+    #                 description="observation we want to fetch from vectordb"
+    #             )
+    #         @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
+    #         def convert_to_structured( observation=None, json_schema=None):
+    #             """Convert unstructured data to structured data"""
+    #             BASE_DIR = os.getcwd()
+    #             json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
+    #
+    #             def load_json_or_infer_schema(file_path, document_path):
+    #                 """Load JSON schema from file or infer schema from text"""
+    #
+    #                 # Attempt to load the JSON file
+    #                 with open(file_path, 'r') as file:
+    #                     json_schema = json.load(file)
+    #                 return json_schema
+    #
+    #             json_schema =load_json_or_infer_schema(json_path, None)
+    #             def run_open_ai_mapper(observation=None, json_schema=None):
+    #                 """Convert unstructured data to structured data"""
+    #
+    #                 prompt_msgs = [
+    #                     SystemMessage(
+    #                         content="You are a world class algorithm converting unstructured data into structured data."
+    #                     ),
+    #                     HumanMessage(content="Convert unstructured data to structured data:"),
+    #                     HumanMessagePromptTemplate.from_template("{input}"),
+    #                     HumanMessage(content="Tips: Make sure to answer in the correct format"),
+    #                 ]
+    #                 prompt_ = ChatPromptTemplate(messages=prompt_msgs)
+    #                 chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
+    #                 output = chain_funct.run(input=observation, llm=self.llm)
+    #                 return output
+    #
+    #             result = run_open_ai_mapper(observation, json_schema)
+    #             return result
+    #         class TranslateText(BaseModel):
+    #             observation: str = Field(
+    #                 description="observation we want to translate"
+    #             )
+    #
+    #         @tool("translate_to_de", args_schema=TranslateText, return_direct=True)
+    #         def translate_to_de(observation, args_schema=TranslateText):
+    #             """Translate to English"""
+    #             out = GoogleTranslator(source='auto', target='de').translate(text=observation)
+    #             return out
+    #
+    #         agent = initialize_agent(
+    #             llm=self.llm,
+    #             tools=[translate_to_de, convert_to_structured],
+    #             agent=AgentType.OPENAI_FUNCTIONS,
+    #
+    #             verbose=True,
+    #         )
+    #         print("HERE IS THE TASK", task)
+    #         output = agent.run(input=task)
+    #         print(output)
+    #         result_tasks.append(task)
+    #         result_tasks.append(output)
+    #
+    #
+    #
+    #     print("HERE IS THE RESULT TASKS", str(result_tasks))
+    #
+    #
+    #     await self.encoding(str(result_tasks), self.namespace, params=params)
+    #
+    #
+    #
+    #     buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+    #
+    #     print("HERE IS THE RESULT TASKS", str(buffer_result))
+    #
+    #
+    #     class EpisodicTask(BaseModel):
+    #         """Schema for an individual task."""
+    #         task_order: str = Field(..., description="The order at which the task needs to be performed")
+    #         task_name: str = Field(None, description="The task that needs to be performed")
+    #         operation: str = Field(None, description="The operation to be performed")
+    #         operation_result: str = Field(None, description="The result of the operation")
+    #
+    #     class EpisodicList(BaseModel):
+    #         """Schema for the record containing a list of tasks."""
+    #         tasks: List[EpisodicTask] = Field(..., description="List of tasks")
+    #         start_date: str = Field(..., description="The order at which the task needs to be performed")
+    #         end_date: str = Field(..., description="The order at which the task needs to be performed")
+    #         user_query: str = Field(..., description="The order at which the task needs to be performed")
+    #
+    #     parser = PydanticOutputParser(pydantic_object=EpisodicList)
+    #
+    #     prompt = PromptTemplate(
+    #         template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}",
+    #         input_variables=["query", "steps", "buffer"],
+    #         partial_variables={"format_instructions": parser.get_format_instructions()},
+    #     )
+    #
+    #     _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
+    #     #
+    #     # print("a few things to do like load episodic memory in a structured format")
+    #     #
+    #     # return "a few things to do like load episodic memory in a structured format"
+    #
+    #     output = self.llm_base(_input.to_string())
+    #
+    #     result_parsing = parser.parse(output)
+    #
+    #     print("here is the parsing result", result_parsing)
+    #     memory = Memory(user_id=self.user_id)
+    #     await memory.async_init()
+    #     #
+    #     lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
+    #     #now we clean up buffer memory
+    #
+    #     await memory._delete_buffer_memory()
+    #     return lookup_value
+
+    # load to buffer once is done
+
+    # fetch everything in the current session and load to episodic memory
+
 async def main():
     memory = Memory(user_id="123")
     await memory.async_init()

From 59b1c54cb837ad9b039b6441b0c96933bc420b8e Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Thu, 24 Aug 2023 17:55:26 +0200
Subject: [PATCH 09/12] removed extra code

---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 801 +-----------------
 1 file changed, 1 insertion(+), 800 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 4b6c579c5..a4582380c 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -803,7 +803,7 @@ class Memory:
         # Asynchronous initialization of LongTermMemory and ShortTermMemory
         self.long_term_memory = await self.async_create_long_term_memory(
             user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
-            namespace=self.namespace, db_type=self.db_type
+             db_type=self.db_type
         )
         self.short_term_memory = await self.async_create_short_term_memory(
             user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
@@ -865,805 +865,6 @@ class Memory:
 
 
 
-# Usage
-# factory = VectorDBFactory()
-# # Example usage with placeholders; you'd replace these with actual values.
-# user_id = "some_user_id"
-# index_name = "some_index_name"
-# memory_id = "some_memory_id"
-# db_type = "pinecone"
-# namespace = "some_namespace"
-#
-# vector_db = factory.create_vector_db(user_id, index_name, memory_id, db_type=db_type, namespace=namespace)
-
-
-# class VectorDB:
-#     OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
-#     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
-#
-#     def __init__(self, user_id: str, index_name: str, memory_id: str, ltm_memory_id: str = '00000',
-#                  st_memory_id: str = '0000', buffer_id: str = '0000', db_type: str = "pinecone", namespace: str = None):
-#         self.user_id = user_id
-#         self.index_name = index_name
-#         self.db_type = db_type
-#         self.namespace = namespace
-#         self.memory_id = memory_id
-#         self.ltm_memory_id = ltm_memory_id
-#         self.st_memory_id = st_memory_id
-#         self.buffer_id = buffer_id
-#         # if self.db_type == "pinecone":
-#         #     self.vectorstore = self.init_pinecone(self.index_name)
-#         if self.db_type == "weaviate":
-#             self.init_weaviate(namespace=self.namespace)
-#         else:
-#             raise ValueError(f"Unsupported database type: {db_type}")
-#         if self.db_type == "weaviate":
-#             self.init_weaviate_client(namespace=self.namespace)
-#         else:
-#             raise ValueError(f"Unsupported VectorDB client type: {db_type}")
-#         load_dotenv()
-#
-#     def init_pinecone(self, index_name):
-#         load_dotenv()
-#         PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
-#         PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
-#         pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
-#         pinecone.Index(index_name)
-#         vectorstore: Pinecone = Pinecone.from_existing_index(
-#
-#             index_name=self.index_name,
-#             embedding=OpenAIEmbeddings(),
-#             namespace='RESULT'
-#         )
-#         return vectorstore
-#
-#     def init_weaviate_client(self, namespace: str):
-#         embeddings = OpenAIEmbeddings()
-#         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
-#         client = weaviate.Client(
-#             url=os.environ.get('WEAVIATE_URL'),
-#             auth_client_secret=auth_config,
-#
-#             additional_headers={
-#                 "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
-#             }
-#         )
-#         return client
-#
-#     def init_weaviate(self, namespace: str):
-#         embeddings = OpenAIEmbeddings()
-#         auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
-#         client = weaviate.Client(
-#             url=os.environ.get('WEAVIATE_URL'),
-#             auth_client_secret=auth_config,
-#
-#             additional_headers={
-#                 "X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
-#             }
-#         )
-#         retriever = WeaviateHybridSearchRetriever(
-#             client=client,
-#             index_name=namespace,
-#             text_key="text",
-#             attributes=[],
-#             embedding=embeddings,
-#             create_schema_if_missing=True,
-#         )
-#         return retriever
-#
-#     async def add_memories(self, observation: str, params: dict = None):
-#         if self.db_type == "pinecone":
-#             # Update Pinecone memories here
-#             vectorstore: Pinecone = Pinecone.from_existing_index(
-#                 index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
-#             )
-#             retriever = vectorstore.as_retriever()
-#             retriever.add_documents(
-#                 [
-#                     Document(
-#                         page_content=observation,
-#                         metadata={
-#                             "inserted_at": datetime.now(),
-#                             "text": observation,
-#                             "user_id": self.user_id,
-#                             "version": params.get('version', None) or "",
-#                             "agreement_id": params.get('agreement_id', None) or "",
-#                             "privacy_policy": params.get('privacy_policy', None) or "",
-#                             "terms_of_service": params.get('terms_of_service', None) or "",
-#                             "format": params.get('format', None) or "",
-#                             "schema_version": params.get('schema_version', None) or "",
-#                             "checksum": params.get('checksum', None) or "",
-#                             "owner": params.get('owner', None) or "",
-#                             "license": params.get('license', None) or "",
-#                             "validity_start": params.get('validity_start', None) or "",
-#                             "validity_end": params.get('validity_end', None) or ""
-#                         },
-#                         namespace=self.namespace,
-#                     )
-#                 ]
-#             )
-#         elif self.db_type == "weaviate":
-#             # Update Weaviate memories here
-#             print(self.namespace)
-#             retriever = self.init_weaviate(self.namespace)
-#
-#             return retriever.add_documents([
-#                 Document(
-#                     metadata={
-#                         "text": observation,
-#                         "user_id": str(self.user_id),
-#                         "memory_id": str(self.memory_id),
-#                         "ltm_memory_id": str(self.ltm_memory_id),
-#                         "st_memory_id": str(self.st_memory_id),
-#                         "buffer_id": str(self.buffer_id),
-#                         "version": params.get('version', None) or "",
-#                         "agreement_id": params.get('agreement_id', None) or "",
-#                         "privacy_policy": params.get('privacy_policy', None) or "",
-#                         "terms_of_service": params.get('terms_of_service', None) or "",
-#                         "format": params.get('format', None) or "",
-#                         "schema_version": params.get('schema_version', None) or "",
-#                         "checksum": params.get('checksum', None) or "",
-#                         "owner": params.get('owner', None) or "",
-#                         "license": params.get('license', None) or "",
-#                         "validity_start": params.get('validity_start', None) or "",
-#                         "validity_end": params.get('validity_end', None) or ""
-#
-#                         # **source_metadata,
-#                     },
-#                     page_content=observation,
-#                 )]
-#             )
-#
-#     # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
-#     #     return Pinecone.from_existing_index(
-#     #         index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
-#     #     )
-#
-#     async def fetch_memories(self, observation: str, namespace: str, params: dict = None):
-#         if self.db_type == "pinecone":
-#             # Fetch Pinecone memories here
-#             pass
-#         elif self.db_type == "weaviate":
-#             # Fetch Weaviate memories here
-#             """
-#             Get documents from weaviate.
-#
-#             Args a json containing:
-#                 query (str): The query string.
-#                 path (list): The path for filtering, e.g., ['year'].
-#                 operator (str): The operator for filtering, e.g., 'Equal'.
-#                 valueText (str): The value for filtering, e.g., '2017*'.
-#
-#             Example:
-#                 get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
-#             """
-#             client = self.init_weaviate_client(self.namespace)
-#
-#             print(self.namespace)
-#             print(str(datetime.now()))
-#             print(observation)
-#             if namespace is None:
-#                 namespace = self.namespace
-#
-#             params_user_id = {
-#                 "path": ["user_id"],
-#                 "operator": "Like",
-#                 "valueText": self.user_id
-#             }
-#
-#             if params:
-#                 query_output = client.query.get(namespace, ["text"
-#                     , "user_id"
-#                     , "memory_id"
-#                     , "ltm_memory_id"
-#                     , "st_memory_id"
-#                     , "buffer_id"
-#                     , "version",
-#                                                             "agreement_id",
-#                                                             "privacy_policy",
-#                                                             "terms_of_service",
-#                                                             "format",
-#                                                             "schema_version",
-#                                                             "checksum",
-#                                                             "owner",
-#                                                             "license",
-#                                                             "validity_start",
-#                                                             "validity_end"]).with_where(params).with_additional(
-#                     ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
-#                 return query_output
-#             else:
-#                 query_output = client.query.get(namespace, ["text",
-#                                                             "user_id",
-#                                                             "memory_id",
-#                                                             "ltm_memory_id",
-#                                                             "st_memory_id",
-#                                                             "buffer_id",
-#                                                             "version",
-#                                                             "agreement_id",
-#                                                             "privacy_policy",
-#                                                             "terms_of_service",
-#                                                             "format",
-#                                                             "schema_version",
-#                                                             "checksum",
-#                                                             "owner",
-#                                                             "license",
-#                                                             "validity_start",
-#                                                             "validity_end"
-#                                                             ]).with_additional(
-#                     ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
-#                 return query_output
-#
-#     async def delete_memories(self, params: dict = None):
-#         client = self.init_weaviate_client(self.namespace)
-#         if params:
-#             where_filter = {
-#                 "path": ["id"],
-#                 "operator": "Equal",
-#                 "valueText": params.get('id', None)
-#             }
-#             return client.batch.delete_objects(
-#                 class_name=self.namespace,
-#                 # Same `where` filter as in the GraphQL API
-#                 where=where_filter,
-#             )
-#         else:
-#             # Delete all objects
-#
-#             return client.batch.delete_objects(
-#                 class_name=self.namespace,
-#                 where={
-#                     'path': ['user_id'],
-#                     'operator': 'Equal',
-#                     'valueText': self.user_id
-#                 }
-#             )
-#
-#     def update_memories(self, observation, namespace: str, params: dict = None):
-#         client = self.init_weaviate_client(self.namespace)
-#
-#         client.data_object.update(
-#             data_object={
-#                 "text": observation,
-#                 "user_id": str(self.user_id),
-#                 "memory_id": str(self.memory_id),
-#                 "ltm_memory_id": str(self.ltm_memory_id),
-#                 "st_memory_id": str(self.st_memory_id),
-#                 "buffer_id": str(self.buffer_id),
-#                 "version": params.get('version', None) or "",
-#                 "agreement_id": params.get('agreement_id', None) or "",
-#                 "privacy_policy": params.get('privacy_policy', None) or "",
-#                 "terms_of_service": params.get('terms_of_service', None) or "",
-#                 "format": params.get('format', None) or "",
-#                 "schema_version": params.get('schema_version', None) or "",
-#                 "checksum": params.get('checksum', None) or "",
-#                 "owner": params.get('owner', None) or "",
-#                 "license": params.get('license', None) or "",
-#                 "validity_start": params.get('validity_start', None) or "",
-#                 "validity_end": params.get('validity_end', None) or ""
-#
-#                 # **source_metadata,
-#
-#             },
-#             class_name="Test",
-#             uuid=params.get('id', None),
-#             consistency_level=weaviate.data.replication.ConsistencyLevel.ALL,  # default QUORUM
-#         )
-#         return
-# class SemanticMemory:
-#     def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
-#                  namespace: str = "SEMANTICMEMORY"):
-#         # Add any semantic memory-related attributes or setup here
-#         self.user_id = user_id
-#         self.index_name = index_name
-#         self.namespace = namespace
-#         self.semantic_memory_id = str(uuid.uuid4())
-#         self.memory_id = memory_id
-#         self.ltm_memory_id = ltm_memory_id
-#         self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
-#                                   index_name=index_name, db_type=db_type, namespace=self.namespace)
-#         self.db_type = db_type
-#
-#     async def _add_memories(self, semantic_memory: str = "None", params: dict = None) -> list[str]:
-#         """Update semantic memory for the user"""
-#
-#         if self.db_type == "weaviate":
-#             text_splitter = RecursiveCharacterTextSplitter(
-#                 chunk_size=400,
-#                 chunk_overlap=20,
-#                 length_function=len,
-#                 is_separator_regex=False,
-#             )
-#             texts = text_splitter.create_documents([semantic_memory])
-#             for text in texts:
-#                 out = await self.vector_db.add_memories(observation=text.page_content, params=params)
-#                 return out
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#     async def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
-#         """Fetch related characteristics, preferences or dislikes for a user."""
-#         # self.init_pinecone(index_name=self.index)
-#
-#         if self.db_type == "weaviate":
-#
-#             return await self.vector_db.fetch_memories(observation, params)
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#     async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-#         """Fetch related characteristics, preferences or dislikes for a user."""
-#         # self.init_pinecone(index_name=self.index)
-#
-#         if self.db_type == "weaviate":
-#
-#             return await self.vector_db.delete_memories(params=params)
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#
-# class EpisodicMemory:
-#     def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
-#                  namespace: str = "EPISODICMEMORY"):
-#         # Add any semantic memory-related attributes or setup here
-#         self.user_id = user_id
-#         self.index_name = index_name
-#         self.namespace = namespace
-#         self.episodic_memory_id = str(uuid.uuid4())
-#         self.memory_id = memory_id
-#         self.ltm_memory_id = ltm_memory_id
-#         self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
-#                                   index_name=index_name, db_type=db_type, namespace=self.namespace)
-#         self.db_type = db_type
-#
-#     async def _add_memories(self, observation: str = None, params: dict = None) -> list[str]:
-#         """Update semantic memory for the user"""
-#
-#         if self.db_type == "weaviate":
-#             return await self.vector_db.add_memories(observation=observation, params=params)
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#     def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
-#         """Fetch related characteristics, preferences or dislikes for a user."""
-#         # self.init_pinecone(index_name=self.index)
-#
-#         if self.db_type == "weaviate":
-#
-#             return self.vector_db.fetch_memories(observation, params)
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#     async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-#         """Fetch related characteristics, preferences or dislikes for a user."""
-#         # self.init_pinecone(index_name=self.index)
-#
-#         if self.db_type == "weaviate":
-#
-#             return await self.vector_db.delete_memories(params=params)
-#
-#         elif self.db_type == "pinecone":
-#             pass
-#
-#
-# class LongTermMemory:
-#     def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
-#                  db_type: str = "weaviate"):
-#         self.user_id = user_id
-#         self.memory_id = memory_id
-#         self.ltm_memory_id = str(uuid.uuid4())
-#         self.index_name = index_name
-#         self.namespace = namespace
-#         self.db_type = db_type
-#         # self.episodic_memory = EpisodicMemory()
-#         self.semantic_memory = SemanticMemory(user_id=self.user_id, memory_id=self.memory_id,
-#                                               ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
-#                                               db_type=self.db_type)
-#         self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id,
-#                                               ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
-#                                               db_type=self.db_type)
-#
-#
-# class ShortTermMemory:
-#     def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
-#                  db_type: str = "weaviate"):
-#         # Add any short-term memory-related attributes or setup here
-#         self.user_id = user_id
-#         self.memory_id = memory_id
-#         self.namespace = namespace
-#         self.db_type = db_type
-#         self.stm_memory_id = str(uuid.uuid4())
-#         self.index_name = index_name
-#         self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id,
-#                                               index_name=self.index_name, db_type=self.db_type)
-
-
-class EpisodicBuffer:
-    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None,
-                 namespace: str = 'EPISODICBUFFER', db_type: str = "weaviate"):
-        # Add any short-term memory-related attributes or setup here
-        self.user_id = user_id
-        self.memory_id = memory_id
-        self.namespace = namespace
-        self.db_type = db_type
-        self.st_memory_id = "blah"
-        self.index_name = index_name
-        self.llm = ChatOpenAI(
-            temperature=0.0,
-            max_tokens=1200,
-            openai_api_key=os.environ.get('OPENAI_API_KEY'),
-            model_name="gpt-4-0613",
-            callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
-        )
-        self.llm_base = OpenAI(
-            temperature=0.0,
-            max_tokens=1200,
-            openai_api_key=os.environ.get('OPENAI_API_KEY'),
-            model_name="gpt-4-0613"
-        )
-
-    # async def _fetch_memories(self, observation: str, namespace: str) -> str:
-    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-    #
-    #     query = await vector_db.fetch_memories(observation=observation, namespace=namespace)
-    #     return query
-    #
-    # async def _add_memories(self, observation: str, namespace: str, params: dict = None):
-    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-    #
-    #     query = await vector_db.add_memories(observation, params=params)
-    #     return query
-    #
-    # async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
-    #     """Fetch related characteristics, preferences or dislikes for a user."""
-    #     # self.init_pinecone(index_name=self.index)
-    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-    #                          index_name=self.index_name, db_type=self.db_type, namespace=self.namespace)
-    #
-    #     if self.db_type == "weaviate":
-    #
-    #         return await vector_db.delete_memories(params=params)
-    #
-    #     elif self.db_type == "pinecone":
-    #         pass
-    #
-    # async def freshness(self, observation: str,namespace:str=None) -> list[str]:
-    #     """Freshness - Score between 1 and 5  on how often was the information updated in episodic or semantic memory in the past"""
-    #
-    #     memory = Memory(user_id=self.user_id)
-    #     await memory.async_init()
-    #
-    #     lookup_value = await memory._fetch_episodic_memory(observation = observation)
-    #     unix_t = lookup_value["data"]["Get"]["EPISODICMEMORY"][0]["_additional"]["lastUpdateTimeUnix"]
-    #
-    #     # Convert Unix timestamp to datetime
-    #     last_update_datetime = datetime.fromtimestamp(int(unix_t) / 1000)
-    #     time_difference = datetime.now() - last_update_datetime
-    #     time_difference_text = humanize.naturaltime(time_difference)
-    #     marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
-    #     @ai_classifier
-    #     class MemoryRoute(Enum):
-    #         """Represents classifer for freshness of memories"""
-    #
-    #         data_uploaded_now = "0"
-    #         data_uploaded_very_recently = "1"
-    #         data_uploaded_recently = "2"
-    #         data_uploaded_more_than_a_month_ago = "3"
-    #         data_uploaded_more_than_three_months_ago = "4"
-    #         data_uploaded_more_than_six_months_ago = "5"
-    #
-    #     namespace = MemoryRoute(str(time_difference_text))
-    #     return [namespace.value, lookup_value]
-    #
-    #
-    # async def frequency(self, observation: str,namespace:str) -> list[str]:
-    #     """Frequency - Score between 1 and 5 on how often was the information processed in episodic memory in the past
-    #        Counts the number of times a memory was accessed in the past and divides it by the total number of memories in the episodic memory """
-    #     client = self.init_weaviate_client(self.namespace)
-    #
-    #     memory = Memory(user_id=self.user_id)
-    #     await memory.async_init()
-    #
-    #     result_output = await memory._fetch_episodic_memory(observation=observation)
-    #     number_of_relevant_events = len(result_output["data"]["Get"]["EPISODICMEMORY"])
-    #     number_of_total_events = client.query.aggregate( self.namespace).with_meta_count().do()
-    #     frequency = float(number_of_relevant_events) / float(number_of_total_events)
-    #     return [str(frequency), result_output["data"]["Get"]["EPISODICMEMORY"][0]]
-    #
-    #
-    #
-    # async def relevance(self, observation: str) -> list[str]:
-    #     """Relevance - Score between 1 and 5 on how often was the final information relevant to the user in the past.
-    #        Stored in the episodic memory, mainly to show how well a buffer did the job
-    #        Starts at 1, gets updated based on the user feedback """
-    #
-    #     return ["5", "memory"]
-    #
-    # async def saliency(self, observation: str) -> list[str]:
-    #     """Determines saliency by finding relevance between user input and document schema values.
-    #     After finding document schena value relevant for the user, it forms a new query based on the schema value and the user input """
-    #
-    #     return ["5", "memory"]
-    #
-    # # @ai_classifier
-    # # class MemoryRoute(Enum):
-    # #     """Represents classifer for freshness of memories"""
-    # #
-    # #     data_uploaded_now = "0"
-    # #     data_uploaded_very_recently = "1"
-    # #     data_uploaded_recently = "2"
-    # #     data_uploaded_more_than_a_month_ago = "3"
-    # #     data_uploaded_more_than_three_months_ago = "4"
-    # #     data_uploaded_more_than_six_months_ago = "5"
-    # #
-    # # namespace= MemoryRoute(observation)
-    #
-    # # return ggur
-    #
-    # async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params:dict=None) -> list[str]:
-    #     """Encoding for the buffer, stores raw data in the buffer
-    #     Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
-    #     vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-    #                          index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-    #
-    #     query = await vector_db.add_memories(document, params=params)
-    #     return query
-    #
-    # async def available_operations(self) -> list[str]:
-    #     """Determines what operations are available for the user to process PDFs"""
-    #
-    #     return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory", "load to buffer"]
-    #
-    # async def main_buffer(self, user_input=None, content=None, params=None):
-    #
-    #     """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
-    #
-    #     # we get a list of available operations for our buffer to consider
-    #     # these operations are what we can do with the data, in the context of PDFs (load, translate, structure, etc)
-    #     list_of_operations = await self.available_operations()
-    #
-    #     memory = Memory(user_id=self.user_id)
-    #     await memory.async_init()
-    #     await memory._delete_buffer_memory()
-    #
-    #
-    #     #we just filter the data here to make sure input is clean
-    #     prompt_filter = ChatPromptTemplate.from_template(
-    #         "Filter and remove uneccessary information that is not relevant in the user query, keep it as original as possbile: {query}")
-    #     chain_filter = prompt_filter | self.llm
-    #     output = await chain_filter.ainvoke({"query": user_input})
-    #
-    #     # this part is mostly unfinished but the idea is to apply different algorithms to the data to fetch the most relevant information from the vector stores
-    #     context = []
-    #     if params:
-    #
-    #         if "freshness" in params:
-    #             params.get('freshness', None) # get the value of freshness
-    #             freshness = await self.freshness(observation=str(output))
-    #             context.append(freshness)
-    #
-    #         elif "frequency" in params:
-    #             params.get('freshness', None)
-    #             frequency = await self.freshness(observation=str(output))
-    #             print("freshness", frequency)
-    #             context.append(frequency)
-    #
-    #             #fix this so it actually filters
-    #
-    #
-    #     else:
-    #         #defaults to semantic search if we don't want to apply algorithms on the vectordb data
-    #         memory = Memory(user_id=self.user_id)
-    #         await memory.async_init()
-    #
-    #         lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output))
-    #         lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output))
-    #         lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
-    #
-    #
-    #         context.append(lookup_value_buffer)
-    #         context.append(lookup_value_semantic)
-    #         context.append(lookup_value_episodic)
-    #
-    #         #copy the context over into the buffer
-    #         #do i need to do it for the episodic + raw data, might make sense
-    #     print( "HERE IS THE CONTEXT", context)
-    #     class BufferRawContextTerms(BaseModel):
-    #         """Schema for documentGroups"""
-    #         semantic_search_term: str = Field(..., description="The search term to use to get relevant input based on user query")
-    #         document_description: str = Field(None, description="The short summary of what the document is about")
-    #         document_relevance: str = Field(None, description="The relevance of the document for the task on the scale from 1 to 5")
-    #
-    #
-    #     class BufferRawContextList(BaseModel):
-    #         """Buffer raw context processed by the buffer"""
-    #         docs: List[BufferRawContextTerms] = Field(..., description="List of docs")
-    #         user_query: str = Field(..., description="The original user query")
-    #
-    #
-    #     parser = PydanticOutputParser(pydantic_object=BufferRawContextList)
-    #
-    #     prompt = PromptTemplate(
-    #         template="Summarize and create semantic search queries and relevant document summaries for the user query.\n{format_instructions}\nOriginal query is: {query}\n Retrieved context is: {context}",
-    #         input_variables=["query", "context"],
-    #         partial_variables={"format_instructions": parser.get_format_instructions()},
-    #     )
-    #
-    #     _input = prompt.format_prompt(query=user_input,  context=context)
-    #
-    #     document_context_result = self.llm_base(_input.to_string())
-    #
-    #     document_context_result_parsed = parser.parse(document_context_result)
-    #
-    #     print("HERE ARE THE DOCS PARSED AND STRUCTURED",document_context_result_parsed)
-    #     class Task(BaseModel):
-    #         """Schema for an individual task."""
-    #         task_order: str = Field(..., description="The order at which the task needs to be performed")
-    #         task_name: str = Field(None, description="The task that needs to be performed")
-    #         operation: str = Field(None, description="The operation to be performed")
-    #         original_query: str = Field(None, description="Original user query provided")
-    #
-    #     class TaskList(BaseModel):
-    #         """Schema for the record containing a list of tasks."""
-    #         tasks: List[Task] = Field(..., description="List of tasks")
-    #
-    #     prompt_filter_chunk = f"The raw context data is {str(document_context_result_parsed)} Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
-    #     # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
-    #     # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
-    #     prompt_msgs = [
-    #         SystemMessage(
-    #             content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
-    #         ),
-    #         HumanMessage(content="Decompose based on the following prompt:"),
-    #         HumanMessagePromptTemplate.from_template("{input}"),
-    #         HumanMessage(content="Tips: Make sure to answer in the correct format"),
-    #         HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
-    #
-    #     ]
-    #     prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-    #     chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
-    #     from langchain.callbacks import get_openai_callback
-    #     with get_openai_callback() as cb:
-    #         output = await chain.arun(input=prompt_filter_chunk, verbose=True)
-    #         print(cb)
-    #     # output = json.dumps(output)
-    #     my_object = parse_obj_as(TaskList, output)
-    #     print("HERE IS THE OUTPUT", my_object.json())
-    #
-    #     data = json.loads(my_object.json())
-    #
-    #     # Extract the list of tasks
-    #     tasks_list = data["tasks"]
-    #
-    #     result_tasks =[]
-    #
-    #     for task in tasks_list:
-    #         class PromptWrapper(BaseModel):
-    #             observation: str = Field(
-    #                 description="observation we want to fetch from vectordb"
-    #             )
-    #         @tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
-    #         def convert_to_structured( observation=None, json_schema=None):
-    #             """Convert unstructured data to structured data"""
-    #             BASE_DIR = os.getcwd()
-    #             json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
-    #
-    #             def load_json_or_infer_schema(file_path, document_path):
-    #                 """Load JSON schema from file or infer schema from text"""
-    #
-    #                 # Attempt to load the JSON file
-    #                 with open(file_path, 'r') as file:
-    #                     json_schema = json.load(file)
-    #                 return json_schema
-    #
-    #             json_schema =load_json_or_infer_schema(json_path, None)
-    #             def run_open_ai_mapper(observation=None, json_schema=None):
-    #                 """Convert unstructured data to structured data"""
-    #
-    #                 prompt_msgs = [
-    #                     SystemMessage(
-    #                         content="You are a world class algorithm converting unstructured data into structured data."
-    #                     ),
-    #                     HumanMessage(content="Convert unstructured data to structured data:"),
-    #                     HumanMessagePromptTemplate.from_template("{input}"),
-    #                     HumanMessage(content="Tips: Make sure to answer in the correct format"),
-    #                 ]
-    #                 prompt_ = ChatPromptTemplate(messages=prompt_msgs)
-    #                 chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
-    #                 output = chain_funct.run(input=observation, llm=self.llm)
-    #                 return output
-    #
-    #             result = run_open_ai_mapper(observation, json_schema)
-    #             return result
-    #         class TranslateText(BaseModel):
-    #             observation: str = Field(
-    #                 description="observation we want to translate"
-    #             )
-    #
-    #         @tool("translate_to_de", args_schema=TranslateText, return_direct=True)
-    #         def translate_to_de(observation, args_schema=TranslateText):
-    #             """Translate to English"""
-    #             out = GoogleTranslator(source='auto', target='de').translate(text=observation)
-    #             return out
-    #
-    #         agent = initialize_agent(
-    #             llm=self.llm,
-    #             tools=[translate_to_de, convert_to_structured],
-    #             agent=AgentType.OPENAI_FUNCTIONS,
-    #
-    #             verbose=True,
-    #         )
-    #         print("HERE IS THE TASK", task)
-    #         output = agent.run(input=task)
-    #         print(output)
-    #         result_tasks.append(task)
-    #         result_tasks.append(output)
-    #
-    #
-    #
-    #     print("HERE IS THE RESULT TASKS", str(result_tasks))
-    #
-    #
-    #     await self.encoding(str(result_tasks), self.namespace, params=params)
-    #
-    #
-    #
-    #     buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
-    #
-    #     print("HERE IS THE RESULT TASKS", str(buffer_result))
-    #
-    #
-    #     class EpisodicTask(BaseModel):
-    #         """Schema for an individual task."""
-    #         task_order: str = Field(..., description="The order at which the task needs to be performed")
-    #         task_name: str = Field(None, description="The task that needs to be performed")
-    #         operation: str = Field(None, description="The operation to be performed")
-    #         operation_result: str = Field(None, description="The result of the operation")
-    #
-    #     class EpisodicList(BaseModel):
-    #         """Schema for the record containing a list of tasks."""
-    #         tasks: List[EpisodicTask] = Field(..., description="List of tasks")
-    #         start_date: str = Field(..., description="The order at which the task needs to be performed")
-    #         end_date: str = Field(..., description="The order at which the task needs to be performed")
-    #         user_query: str = Field(..., description="The order at which the task needs to be performed")
-    #
-    #     parser = PydanticOutputParser(pydantic_object=EpisodicList)
-    #
-    #     prompt = PromptTemplate(
-    #         template="Format the result.\n{format_instructions}\nOriginal query is: {query}\n Steps are: {steps}, buffer is: {buffer}",
-    #         input_variables=["query", "steps", "buffer"],
-    #         partial_variables={"format_instructions": parser.get_format_instructions()},
-    #     )
-    #
-    #     _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
-    #     #
-    #     # print("a few things to do like load episodic memory in a structured format")
-    #     #
-    #     # return "a few things to do like load episodic memory in a structured format"
-    #
-    #     output = self.llm_base(_input.to_string())
-    #
-    #     result_parsing = parser.parse(output)
-    #
-    #     print("here is the parsing result", result_parsing)
-    #     memory = Memory(user_id=self.user_id)
-    #     await memory.async_init()
-    #     #
-    #     lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
-    #     #now we clean up buffer memory
-    #
-    #     await memory._delete_buffer_memory()
-    #     return lookup_value
-
-    # load to buffer once is done
-
-    # fetch everything in the current session and load to episodic memory
 
 async def main():
     memory = Memory(user_id="123")

From 32d3bd026ad2f24885bd6367fd7ba1cf7690de23 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Fri, 25 Aug 2023 11:09:13 +0200
Subject: [PATCH 10/12] fix the demo logic

---
 .../level_2_pdf_vectorstore__dlt_contracts.py | 79 +++++++++----------
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index a4582380c..3e30d53ba 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -59,6 +59,8 @@ from langchain.vectorstores import Weaviate
 import weaviate
 import uuid
 import humanize
+import pinecone
+import weaviate
 load_dotenv()
 
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
@@ -88,8 +90,7 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
         print("Hi! I just woke up. Your llm is ending")
 
 
-import pinecone
-import weaviate
+
 
 # Assuming OpenAIEmbeddings and other necessary imports are available
 
@@ -306,7 +307,7 @@ class WeaviateVectorDB(VectorDB):
             )
         else:
             # Delete all objects
-
+            print("HERE IS THE USER ID", self.user_id)
             return client.batch.delete_objects(
                 class_name=self.namespace,
                 where={
@@ -386,9 +387,9 @@ class EpisodicMemory(BaseMemory):
         super().__init__(user_id, memory_id, index_name, db_type, namespace="EPISODICMEMORY")
 
 
-class EpisodicBuffer(EpisodicMemory):
+class EpisodicBuffer(BaseMemory):
     def __init__(self, user_id: str, memory_id: Optional[str], index_name: Optional[str], db_type: str = "weaviate"):
-        super().__init__(user_id, memory_id, index_name, db_type)
+        super().__init__(user_id, memory_id, index_name, db_type, namespace="BUFFERMEMORY")
 
         self.st_memory_id = "blah"
         self.llm = ChatOpenAI(
@@ -480,10 +481,7 @@ class EpisodicBuffer(EpisodicMemory):
     async def encoding(self, document: str, namespace: str = "EPISODICBUFFER", params: dict = None) -> list[str]:
         """Encoding for the buffer, stores raw data in the buffer
         Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer"""
-        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
-                             index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-
-        query = await vector_db.add_memories(document, params=params)
+        query = await self.add_memories(document, params=params)
         return query
 
     async def available_operations(self) -> list[str]:
@@ -502,7 +500,13 @@ class EpisodicBuffer(EpisodicMemory):
 
         memory = Memory(user_id=self.user_id)
         await memory.async_init()
-        await memory._delete_buffer_memory()
+
+        try:
+            # we delete all memories in the episodic buffer, so we can start fresh
+            await self.delete_memories()
+        except:
+            # in case there are no memories, we pass
+            pass
 
         # we just filter the data here to make sure input is clean
         prompt_filter = ChatPromptTemplate.from_template(
@@ -535,7 +539,7 @@ class EpisodicBuffer(EpisodicMemory):
 
             lookup_value_episodic = await memory._fetch_episodic_memory(observation=str(output))
             lookup_value_semantic = await memory._fetch_semantic_memory(observation=str(output))
-            lookup_value_buffer = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+            lookup_value_buffer = await self.fetch_memories(observation=str(output))
 
             context.append(lookup_value_buffer)
             context.append(lookup_value_semantic)
@@ -685,7 +689,7 @@ class EpisodicBuffer(EpisodicMemory):
 
         await self.encoding(str(result_tasks), self.namespace, params=params)
 
-        buffer_result = await self._fetch_memories(observation=str(output), namespace=self.namespace)
+        buffer_result = await self.fetch_memories(observation=str(output))
 
         print("HERE IS THE RESULT TASKS", str(buffer_result))
 
@@ -722,13 +726,11 @@ class EpisodicBuffer(EpisodicMemory):
         result_parsing = parser.parse(output)
 
         print("here is the parsing result", result_parsing)
-        memory = Memory(user_id=self.user_id)
-        await memory.async_init()
         #
         lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
         # now we clean up buffer memory
 
-        await memory._delete_buffer_memory()
+        await self.delete_memories()
         return lookup_value
 
 
@@ -782,23 +784,10 @@ class Memory:
     async def async_create_long_term_memory(self, user_id, memory_id, index_name, db_type):
         # Perform asynchronous initialization steps if needed
         return LongTermMemory(
-            user_id=user_id, memory_id=memory_id, index_name=index_name,
-             db_type=db_type
-        )
-
-    async def async_init(self):
-        # Asynchronous initialization of LongTermMemory and ShortTermMemory
-        self.long_term_memory = await self.async_create_long_term_memory(
             user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
              db_type=self.db_type
         )
 
-    async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type):
-        # Perform asynchronous initialization steps if needed
-        return ShortTermMemory(
-            user_id=user_id, memory_id=memory_id, index_name=index_name, db_type=db_type
-        )
-
     async def async_init(self):
         # Asynchronous initialization of LongTermMemory and ShortTermMemory
         self.long_term_memory = await self.async_create_long_term_memory(
@@ -809,40 +798,48 @@ class Memory:
             user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name,
             db_type=self.db_type
         )
+
+    async def async_create_short_term_memory(self, user_id, memory_id, index_name, db_type):
+        # Perform asynchronous initialization steps if needed
+        return ShortTermMemory(
+            user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type
+        )
+
+
         # self.short_term_memory = await ShortTermMemory.async_init(
         #     user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type
         # )
 
     async def _add_semantic_memory(self, semantic_memory: str, params: dict = None):
-        return await self.long_term_memory.semantic_memory._add_memories(
+        return await self.long_term_memory.semantic_memory.add_memories(
             semantic_memory=semantic_memory, params=params
 
         )
 
     async def _fetch_semantic_memory(self, observation, params):
-        return await self.long_term_memory.semantic_memory._fetch_memories(
+        return await self.long_term_memory.semantic_memory.fetch_memories(
             observation=observation, params=params
 
         )
 
     async def _delete_semantic_memory(self, params: str = None):
-        return await self.long_term_memory.semantic_memory._delete_memories(
+        return await self.long_term_memory.semantic_memory.delete_memories(
             params=params
         )
 
     async def _add_episodic_memory(self, observation: str, params: dict = None):
-        return await self.long_term_memory.episodic_memory._add_memories(
+        return await self.long_term_memory.episodic_memory.add_memories(
             observation=observation, params=params
 
         )
 
     async def _fetch_episodic_memory(self, observation, params: str = None):
-        return await self.long_term_memory.episodic_memory._fetch_memories(
+        return await self.long_term_memory.episodic_memory.fetch_memories(
             observation=observation, params=params
         )
 
     async def _delete_episodic_memory(self, params: str = None):
-        return await self.long_term_memory.episodic_memory._delete_memories(
+        return await self.long_term_memory.episodic_memory.delete_memories(
             params=params
         )
 
@@ -850,18 +847,18 @@ class Memory:
         return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content, params=params)
 
     async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None):
-        return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace,
+        return await self.short_term_memory.episodic_buffer.add_memories(observation=user_input,
                                                                           params=params)
 
     async def _fetch_buffer_memory(self, user_input: str, namespace: str = None):
-        return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace)
+        return await self.short_term_memory.episodic_buffer.fetch_memories(observation=user_input)
 
     async def _delete_buffer_memory(self, params: str = None):
-        return await self.short_term_memory.episodic_buffer._delete_memories(
+        return await self.short_term_memory.episodic_buffer.delete_memories(
             params=params
         )
     async def _available_operations(self):
-        return await self.long_term_memory.episodic_buffer._available_operations()
+        return await self.long_term_memory.episodic_buffer.available_operations()
 
 
 
@@ -886,7 +883,7 @@ async def main():
     gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ", params=params)
     print(gg)
 
-    # gg = await memory._delete_buffer_memory()
+    # gg = await memory._fetch_buffer_memory(user_input="i  TO GERMAN ")
     # print(gg)
 
     episodic = """{
@@ -909,7 +906,9 @@ async def main():
         ]
     }"""
     #
-    # ggur = await memory._add_episodic_memory(observation = episodic, params=params)
+    # ggur = await memory._delete_buffer_memory()
+    # print(ggur)
+    # ggur = await memory._add_buffer_memory(user_input = episodic, params=params)
     # print(ggur)
 
     # fff = await memory._fetch_episodic_memory(observation = "healthy diet")

From fd9d4952eedfa6da0f83728f3aadf176ed457ce8 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Fri, 25 Aug 2023 11:48:43 +0200
Subject: [PATCH 11/12] Updates to the api + refactor

---
 level_2/Readme.md                             | 42 ++++++++++++++-----
 level_2/api.py                                | 26 +++++++++++-
 .../level_2_pdf_vectorstore__dlt_contracts.py | 32 +++++++-------
 3 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/level_2/Readme.md b/level_2/Readme.md
index 5b7115177..2b1d36675 100644
--- a/level_2/Readme.md
+++ b/level_2/Readme.md
@@ -24,21 +24,43 @@ Initial code lets you do three operations:
 
 ## Usage
 
-The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
+The fast API endpoint accepts prompts and stores data with the help of the Memory Manager
+
+The types of memory are: Episodic, Semantic, Buffer
+
+Endpoint Overview
+The Memory API provides the following endpoints:
+
+- /[memory_type]/add-memory (POST)
+- /[memory_type]/fetch-memory (POST)
+- /[memory_type]/delete-memory (POST)
+- /available-buffer-actions (GET)
+- /run-buffer (POST)
+- /buffer/create-context (POST)
+
+Here is a payload example:
 
-```curl                                                                    
-    -X POST                                                                                             
-    -F "prompt=The quick brown fox"                                                                     
-    -F "file=@/path/to/file.pdf"                                                                       
-    http://localhost:8000/upload/                                                                    
 ```
-
 {
   "payload": {
     "user_id": "681",
     "session_id": "471",
     "model_speed": "slow",
-    "prompt": "Temperature=Cold;Food Type=Ice Cream",
-    "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
+    "prompt": "I want ",
+    "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
+    "params": {
+        "version": "1.0",
+        "agreement_id": "AG123456",
+        "privacy_policy": "https://example.com/privacy",
+        "terms_of_service": "https://example.com/terms",
+        "format": "json",
+        "schema_version": "1.1",
+        "checksum": "a1b2c3d4e5f6",
+        "owner": "John Doe",
+        "license": "MIT",
+        "validity_start": "2023-08-01",
+        "validity_end": "2024-07-31"
+    }
   }
-}
\ No newline at end of file
+}
+```
\ No newline at end of file
diff --git a/level_2/api.py b/level_2/api.py
index b4b52ede8..2e5a8e9fe 100644
--- a/level_2/api.py
+++ b/level_2/api.py
@@ -242,7 +242,7 @@ async def available_buffer_actions(
         await Memory_.async_init()
 
         # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
-        output = Memory_._available_operations()
+        output = await Memory_._available_operations()
         return JSONResponse(content={"response": output}, status_code=200)
 
     except Exception as e:
@@ -263,13 +263,35 @@ async def available_buffer_actions(
         await Memory_.async_init()
 
         # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
-        output = Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        output = await Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
         return JSONResponse(content={"response": output}, status_code=200)
 
     except Exception as e:
 
         return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
 
+@app.post("/buffer/create-context", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = await Memory_._create_buffer_context(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
+
+
 #
     #     # Process each uploaded PDF file
     #     results = []
diff --git a/level_2/level_2_pdf_vectorstore__dlt_contracts.py b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
index 3e30d53ba..85598728c 100644
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@@ -490,17 +490,12 @@ class EpisodicBuffer(BaseMemory):
         return ["translate", "structure", "load to database", "load to semantic memory", "load to episodic memory",
                 "load to buffer"]
 
-    async def main_buffer(self, user_input=None, content=None, params=None):
-
-        """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
-
+    async def buffer_context(self, user_input=None, content=None, params=None):
+        """Generates the context to be used for the buffer and passed to the agent"""
         # we get a list of available operations for our buffer to consider
         # these operations are what we can do with the data, in the context of PDFs (load, translate, structure, etc)
         list_of_operations = await self.available_operations()
 
-        memory = Memory(user_id=self.user_id)
-        await memory.async_init()
-
         try:
             # we delete all memories in the episodic buffer, so we can start fresh
             await self.delete_memories()
@@ -530,8 +525,6 @@ class EpisodicBuffer(BaseMemory):
                 context.append(frequency)
 
                 # fix this so it actually filters
-
-
         else:
             # defaults to semantic search if we don't want to apply algorithms on the vectordb data
             memory = Memory(user_id=self.user_id)
@@ -616,7 +609,15 @@ class EpisodicBuffer(BaseMemory):
 
         # Extract the list of tasks
         tasks_list = data["tasks"]
+        return tasks_list
 
+    async def main_buffer(self, user_input=None, content=None, params=None):
+
+        """AI buffer to understand user PDF query, prioritize memory info and process it based on available operations"""
+
+        memory = Memory(user_id=self.user_id)
+        await memory.async_init()
+        tasks_list = await self.buffer_context(user_input=user_input, content=content, params=params)
         result_tasks = []
 
         for task in tasks_list:
@@ -716,20 +717,13 @@ class EpisodicBuffer(BaseMemory):
         )
 
         _input = prompt.format_prompt(query=user_input, steps=str(tasks_list), buffer=buffer_result)
-        #
-        # print("a few things to do like load episodic memory in a structured format")
-        #
+
         # return "a few things to do like load episodic memory in a structured format"
 
         output = self.llm_base(_input.to_string())
-
         result_parsing = parser.parse(output)
-
         print("here is the parsing result", result_parsing)
-        #
         lookup_value = await memory._add_episodic_memory(observation=str(output), params=params)
-        # now we clean up buffer memory
-
         await self.delete_memories()
         return lookup_value
 
@@ -857,6 +851,10 @@ class Memory:
         return await self.short_term_memory.episodic_buffer.delete_memories(
             params=params
         )
+    async def _create_buffer_context(self,  user_input: str, content: str = None, params:str=None):
+        return await self.short_term_memory.episodic_buffer.buffer_context(
+            user_input=user_input, content=content, params=params
+        )
     async def _available_operations(self):
         return await self.long_term_memory.episodic_buffer.available_operations()
 

From 916aca0129270f7e0dea8a576bc725c67b8a6baa Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Fri, 25 Aug 2023 12:11:12 +0200
Subject: [PATCH 12/12] Updates to the api + refactor

---
 level_2/tests/crud_test.py | 76 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 level_2/tests/crud_test.py

diff --git a/level_2/tests/crud_test.py b/level_2/tests/crud_test.py
new file mode 100644
index 000000000..403273481
--- /dev/null
+++ b/level_2/tests/crud_test.py
@@ -0,0 +1,76 @@
+import unittest
+import asyncio
+
+import sys
+sys.path.append("..")  # Adds higher directory to python modules path.
+
+from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory
+class TestMemory(unittest.TestCase):
+
+    def setUp(self):
+        self.loop = asyncio.get_event_loop()
+        self.memory = Memory(user_id="123")
+        self.loop.run_until_complete(self.memory.async_init())
+
+    def test_add_fetch_delete_semantic_memory(self):
+        async def semantic_workflow():
+            params = {"sample_param": "value"}
+            sample_memory = "sample semantic memory"
+
+            # Add
+            await self.memory._add_semantic_memory(sample_memory, params=params)
+            # Fetch
+            fetched = await self.memory._fetch_semantic_memory(sample_memory, params)
+            fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
+            self.assertIn(sample_memory, fetched_text)  # Replace this with the appropriate validation
+            # Delete
+            await self.memory._delete_semantic_memory()
+            # Verify Deletion
+            after_delete = await self.memory._fetch_semantic_memory(sample_memory, params)
+            self.assertNotIn(sample_memory, after_delete)  # Replace with the appropriate validation
+
+        self.loop.run_until_complete(semantic_workflow())
+
+    def test_add_fetch_delete_episodic_memory(self):
+        async def episodic_workflow():
+            params = {"sample_param": "value"}
+            sample_memory = """{
+                                "sample_key": "sample_value"
+                              }"""
+
+            # Add
+            await self.memory._add_episodic_memory(observation=sample_memory, params=params)
+            # Fetch
+            fetched = await self.memory._fetch_episodic_memory(sample_memory)
+            fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
+            self.assertIn(sample_memory, fetched_text)  # Replace this with the appropriate validation
+            # Delete
+            await self.memory._delete_episodic_memory()
+            # Verify Deletion
+            after_delete = await self.memory._fetch_episodic_memory(sample_memory)
+            self.assertNotIn(sample_memory, after_delete)  # Replace with the appropriate validation
+
+        self.loop.run_until_complete(episodic_workflow())
+
+    # def test_add_fetch_delete_buffer_memory(self):
+    #     async def buffer_workflow():
+    #         params = {"sample_param": "value"}
+    #         user_input = "sample buffer input"
+    #         namespace = "sample_namespace"
+    #
+    #         # Add
+    #         await self.memory._add_buffer_memory(user_input=user_input, namespace=namespace, params=params)
+    #         # Fetch
+    #         fetched = await self.memory._fetch_buffer_memory(user_input, namespace)
+    #         self.assertIn(user_input, fetched)  # Replace this with the appropriate validation
+    #         # Delete
+    #         await self.memory._delete_buffer_memory()
+    #         # Verify Deletion
+    #         after_delete = await self.memory._fetch_buffer_memory(user_input, namespace)
+    #         self.assertNotIn(user_input, after_delete)  # Replace with the appropriate validation
+    #
+    #     self.loop.run_until_complete(buffer_workflow())
+
+
+if __name__ == '__main__':
+    unittest.main()