added extended logic

2023-08-23 00:14:40 +02:00 · 2023-08-23 00:14:40 +02:00 · 39140620f6
commit 39140620f6
parent 8aa75481e4
2 changed files with 389 additions and 220 deletions
--- a/level_2/api.py
+++ b/level_2/api.py
@ -1,3 +1,5 @@
 from io import BytesIO
 from langchain.document_loaders import PyPDFLoader
 from level_2_pdf_vectorstore__dlt_contracts import Memory
@ -27,7 +29,7 @@ from dotenv import load_dotenv
 load_dotenv()
-
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 app = FastAPI(debug=True)
@ -63,82 +65,73 @@ def health_check():
 #curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
 from fastapi import FastAPI, UploadFile, File
 import requests
 import os
 import json
 app = FastAPI()
 from io import BytesIO
 class Payload(BaseModel):
    payload: Dict[str, Any]
-@app.post("/upload/", response_model=dict)
+# @app.post("/upload/", response_model=dict)
-async def upload_pdf_and_payload(
+# async def upload_pdf_and_payload(
-        payload: Payload,
+#         payload: Payload,
-        # files: List[UploadFile] = File(...),
+#         # files: List[UploadFile] = File(...),
-):
+# ):
-    try:
+#     try:
-        # Process the payload
+#         # Process the payload
-        decoded_payload = payload.payload
+#         decoded_payload = payload.payload
-    # except:
+#     # except:
-    #     pass
+#     #     pass
-    #
+#     #
-    # return JSONResponse(content={"response": decoded_payload}, status_code=200)
+#     # return JSONResponse(content={"response": decoded_payload}, status_code=200)
-
+#
-        # Download the remote PDF if URL is provided
+#         # Download the remote PDF if URL is provided
-        if 'pdf_url' in decoded_payload:
+#         if 'pdf_url' in decoded_payload:
-            pdf_response = requests.get(decoded_payload['pdf_url'])
+#             pdf_response = requests.get(decoded_payload['pdf_url'])
-            pdf_content = pdf_response.content
+#             pdf_content = pdf_response.content
-
+#
-            logging.info("Downloaded PDF from URL")
+#             logging.info("Downloaded PDF from URL")
-
+#
-            # Create an in-memory file-like object for the PDF content
+#             # Create an in-memory file-like object for the PDF content
-            pdf_stream = BytesIO(pdf_content)
+#             pdf_stream = BytesIO(pdf_content)
-
+#
-            contents = pdf_stream.read()
+#             contents = pdf_stream.read()
-
+#
-            tmp_location = os.path.join('/tmp', "tmp.pdf")
+#             tmp_location = os.path.join('/tmp', "tmp.pdf")
-            with open(tmp_location, 'wb') as tmp_file:
+#             with open(tmp_location, 'wb') as tmp_file:
-                tmp_file.write(contents)
+#                 tmp_file.write(contents)
-
+#
-            logging.info("Wrote PDF from URL")
+#             logging.info("Wrote PDF from URL")
-
+#
-            # Process the PDF using PyPDFLoader
+#             # Process the PDF using PyPDFLoader
-            loader = PyPDFLoader(tmp_location)
+#             loader = PyPDFLoader(tmp_location)
-            pages = loader.load_and_split()
+#             pages = loader.load_and_split()
-            logging.info(" PDF split into pages")
+#             logging.info(" PDF split into pages")
-            Memory_ = Memory(index_name="my-agent", user_id='555' )
+#             Memory_ = Memory(index_name="my-agent", user_id='555' )
-            await Memory_.async_init()
+#             await Memory_.async_init()
-            Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
+#             Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
-
+#
-
+#
-            # Run the buffer
+#             # Run the buffer
-            response = Memory_._run_buffer(user_input="I want to get a schema for my data")
+#             response = Memory_._run_buffer(user_input="I want to get a schema for my data")
-            return JSONResponse(content={"response": response}, status_code=200)
+#             return JSONResponse(content={"response": response}, status_code=200)
-
+#
-            #to do: add the user id to the payload
+#             #to do: add the user id to the payload
-            #to do add the raw pdf to payload
+#             #to do add the raw pdf to payload
-            # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
+#             # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
-            # print(bb)
+#             # print(bb)
-
+#
-
+#
-    except Exception as e:
+#     except Exception as e:
-
+#
-        return {"error": str(e)}
+#         return {"error": str(e)}
-            # Here you can perform your processing on the PDF contents
+#             # Here you can perform your processing on the PDF contents
-            # results.append({"filename": file.filename, "size": len(contents)})
+#             # results.append({"filename": file.filename, "size": len(contents)})
-
+#
-            # Append the in-memory file to the files list
+#             # Append the in-memory file to the files list
-            # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
+#             # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
-
+#
 def memory_factory(memory_type):
    load_dotenv()
    class Payload(BaseModel):
        payload: Dict[str, Any]
    @app.post("/{memory_type}/add-memory", response_model=dict)
@ -148,16 +141,40 @@ def memory_factory(memory_type):
    ):
        try:
            logging.info(" Init PDF processing")
            decoded_payload = payload.payload
-            Memory_ = Memory( user_id='555')
+            if 'pdf_url' in decoded_payload:
                pdf_response = requests.get(decoded_payload['pdf_url'])
                pdf_content = pdf_response.content
-            await Memory_.async_init()
+                logging.info("Downloaded PDF from URL")
-            memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
+                # Create an in-memory file-like object for the PDF content
-            output= memory_class(observation=decoded_payload['prompt'])
+                pdf_stream = BytesIO(pdf_content)
-            return JSONResponse(content={"response": output}, status_code=200)
+
                contents = pdf_stream.read()
                tmp_location = os.path.join('/tmp', "tmp.pdf")
                with open(tmp_location, 'wb') as tmp_file:
                    tmp_file.write(contents)
                logging.info("Wrote PDF from URL")
                # Process the PDF using PyPDFLoader
                loader = PyPDFLoader(tmp_location)
                # pages = loader.load_and_split()
                logging.info(" PDF split into pages")
                Memory_ = Memory(user_id='555')
                await Memory_.async_init()
                memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
                output= await memory_class(observation=str(loader))
                return JSONResponse(content={"response": output}, status_code=200)
        except Exception as e:
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
@ -1,4 +1,4 @@
-#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
+# Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
 import dlt
 from langchain import PromptTemplate, LLMChain
@ -15,7 +15,7 @@ from deep_translator import (GoogleTranslator)
 from langchain.chat_models import ChatOpenAI
 from langchain.schema import LLMResult, HumanMessage
 from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
-
+from pydantic import BaseModel, Field, parse_obj_as
 from langchain.memory import VectorStoreRetrieverMemory
 from marvin import ai_classifier
 from enum import Enum
@ -29,14 +29,13 @@ from langchain.tools import tool
 from langchain.vectorstores import Weaviate
 import uuid
 from dotenv import load_dotenv
 load_dotenv()
 from pathlib import Path
 from langchain import OpenAI, LLMMathChain
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts import ChatPromptTemplate
 import os
 from datetime import datetime
@ -56,12 +55,17 @@ from langchain.schema import Document, SystemMessage, HumanMessage
 from langchain.vectorstores import Weaviate
 import weaviate
 import uuid
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 class MyCustomSyncHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")
 class MyCustomAsyncHandler(AsyncCallbackHandler):
    """Async callback handler that can be used to handle callbacks from langchain."""
@ -80,14 +84,17 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
        await asyncio.sleep(0.3)
        print("Hi! I just woke up. Your llm is ending")
 class VectorDB:
    OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
-    def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_memory_id:str='00000', st_memory_id:str='0000', buffer_id:str='0000', db_type: str = "pinecone",  namespace:str = None):
+
    def __init__(self, user_id: str, index_name: str, memory_id: str, ltm_memory_id: str = '00000',
                 st_memory_id: str = '0000', buffer_id: str = '0000', db_type: str = "pinecone", namespace: str = None):
        self.user_id = user_id
        self.index_name = index_name
        self.db_type = db_type
-        self.namespace=namespace
+        self.namespace = namespace
        self.memory_id = memory_id
        self.ltm_memory_id = ltm_memory_id
        self.st_memory_id = st_memory_id
@ -118,7 +125,7 @@ class VectorDB:
        )
        return vectorstore
-    def init_weaviate_client(self, namespace:str):
+    def init_weaviate_client(self, namespace: str):
        embeddings = OpenAIEmbeddings()
        auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
        client = weaviate.Client(
@ -131,7 +138,7 @@ class VectorDB:
        )
        return client
-    def init_weaviate(self, namespace:str):
+    def init_weaviate(self, namespace: str):
        embeddings = OpenAIEmbeddings()
        auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
        client = weaviate.Client(
@ -152,15 +159,12 @@ class VectorDB:
        )
        return retriever
-    async def add_memories(self, observation: str,  page: str = "", source: str = ""):
+    async def add_memories(self, observation: str, params: dict = None):
        if self.db_type == "pinecone":
            # Update Pinecone memories here
            vectorstore: Pinecone = Pinecone.from_existing_index(
                index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
            )
            retriever = vectorstore.as_retriever()
            retriever.add_documents(
                [
@ -170,8 +174,17 @@ class VectorDB:
                            "inserted_at": datetime.now(),
                            "text": observation,
                            "user_id": self.user_id,
-                            "page": page,
+                            "version": params.get('version', None) or "",
-                            "source": source
+                            "agreement_id": params.get('agreement_id', None) or "",
                            "privacy_policy": params.get('privacy_policy', None) or "",
                            "terms_of_service": params.get('terms_of_service', None) or "",
                            "format": params.get('format', None) or "",
                            "schema_version": params.get('schema_version', None) or "",
                            "checksum": params.get('checksum', None) or "",
                            "owner": params.get('owner', None) or "",
                            "license": params.get('license', None) or "",
                            "validity_start": params.get('validity_start', None) or "",
                            "validity_end": params.get('validity_end', None) or ""
                        },
                        namespace=self.namespace,
                    )
@ -180,10 +193,9 @@ class VectorDB:
        elif self.db_type == "weaviate":
            # Update Weaviate memories here
            print(self.namespace)
-            retriever = self.init_weaviate( self.namespace)
+            retriever = self.init_weaviate(self.namespace)
-
+            return retriever.add_documents([
            return  retriever.add_documents([
                Document(
                    metadata={
                        "text": observation,
@ -192,18 +204,30 @@ class VectorDB:
                        "ltm_memory_id": str(self.ltm_memory_id),
                        "st_memory_id": str(self.st_memory_id),
                        "buffer_id": str(self.buffer_id),
                        "version": params.get('version', None) or "",
                        "agreement_id": params.get('agreement_id', None) or "",
                        "privacy_policy": params.get('privacy_policy', None) or "",
                        "terms_of_service": params.get('terms_of_service', None) or "",
                        "format": params.get('format', None) or "",
                        "schema_version": params.get('schema_version', None) or "",
                        "checksum": params.get('checksum', None) or "",
                        "owner": params.get('owner', None) or "",
                        "license": params.get('license', None) or "",
                        "validity_start": params.get('validity_start', None) or "",
                        "validity_end": params.get('validity_end', None) or ""
                        # **source_metadata,
                    },
                    page_content=observation,
                )]
            )
    # def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
    #     return Pinecone.from_existing_index(
    #         index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
    #     )
-    async def fetch_memories(self, observation: str, namespace:str, params = None):
+    async def fetch_memories(self, observation: str, namespace: str, params: dict = None):
        if self.db_type == "pinecone":
            # Fetch Pinecone memories here
            pass
@ -227,22 +251,57 @@ class VectorDB:
            print(str(datetime.now()))
            print(observation)
            if namespace is None:
-                namespace= self.namespace
+                namespace = self.namespace
-            params_user_id= {
+            params_user_id = {
-              "path": ["user_id"],
+                "path": ["user_id"],
-              "operator": "Like",
+                "operator": "Like",
-              "valueText": self.user_id
+                "valueText": self.user_id
            }
            if params:
-                query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id", "st_memory_id", "buffer_id"]).with_where(params).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do()
+                query_output = client.query.get(namespace, ["text"
                    , "user_id"
                    , "memory_id"
                    , "ltm_memory_id"
                    , "st_memory_id"
                    , "buffer_id"
                    , "version",
                                                            "agreement_id",
                                                            "privacy_policy",
                                                            "terms_of_service",
                                                            "format",
                                                            "schema_version",
                                                            "checksum",
                                                            "owner",
                                                            "license",
                                                            "validity_start",
                                                            "validity_end"]).with_where(params).with_additional(
                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
                return query_output
            else:
-                query_output = client.query.get(namespace, ["text","user_id", "memory_id", "ltm_memory_id","st_memory_id", "buffer_id"]).with_additional(['id','creationTimeUnix','lastUpdateTimeUnix']).with_where(params_user_id).do()
+                query_output = client.query.get(namespace, ["text",
                                                            "user_id",
                                                            "memory_id",
                                                            "ltm_memory_id",
                                                            "st_memory_id",
                                                            "buffer_id",
                                                            "version",
                                                            "agreement_id",
                                                            "privacy_policy",
                                                            "terms_of_service",
                                                            "format",
                                                            "schema_version",
                                                            "checksum",
                                                            "owner",
                                                            "license",
                                                            "validity_start",
                                                            "validity_end"
                                                            ]).with_additional(
                    ['id', 'creationTimeUnix', 'lastUpdateTimeUnix', "score"]).with_where(params_user_id).do()
                return query_output
-    async def delete_memories(self, params:dict = None):
+    async def delete_memories(self, params: dict = None):
        client = self.init_weaviate_client(self.namespace)
        if params:
            where_filter = {
@ -256,7 +315,7 @@ class VectorDB:
                where=where_filter,
            )
        else:
-            #Delete all objects
+            # Delete all objects
            return client.batch.delete_objects(
                class_name=self.namespace,
@ -267,8 +326,9 @@ class VectorDB:
                }
            )
-    def update_memories(self, observation, namespace:str,params:dict = None):
+    def update_memories(self, observation, namespace: str, params: dict = None):
        client = self.init_weaviate_client(self.namespace)
        client.data_object.update(
            data_object={
                "text": observation,
@ -277,6 +337,17 @@ class VectorDB:
                "ltm_memory_id": str(self.ltm_memory_id),
                "st_memory_id": str(self.st_memory_id),
                "buffer_id": str(self.buffer_id),
                "version": params.get('version', None) or "",
                "agreement_id": params.get('agreement_id', None) or "",
                "privacy_policy": params.get('privacy_policy', None) or "",
                "terms_of_service": params.get('terms_of_service', None) or "",
                "format": params.get('format', None) or "",
                "schema_version": params.get('schema_version', None) or "",
                "checksum": params.get('checksum', None) or "",
                "owner": params.get('owner', None) or "",
                "license": params.get('license', None) or "",
                "validity_start": params.get('validity_start', None) or "",
                "validity_end": params.get('validity_end', None) or ""
                # **source_metadata,
@ -288,33 +359,39 @@ class VectorDB:
        return
 class SemanticMemory:
-    def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="SEMANTICMEMORY"):
+    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
                 namespace: str = "SEMANTICMEMORY"):
        # Add any semantic memory-related attributes or setup here
-        self.user_id=user_id
+        self.user_id = user_id
        self.index_name = index_name
        self.namespace = namespace
        self.semantic_memory_id = str(uuid.uuid4())
        self.memory_id = memory_id
        self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
+        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
        self.db_type = db_type
-
+    async def _add_memories(self, semantic_memory: str = "None", params: dict = None) -> list[str]:
    async def _add_memories(self, semantic_memory: str="None") ->list[str]:
        """Update semantic memory for the user"""
        if self.db_type == "weaviate":
-            out = await self.vector_db.add_memories( observation = semantic_memory)
+            text_splitter = RecursiveCharacterTextSplitter(
-            return out
+                chunk_size=400,
                chunk_overlap=20,
                length_function=len,
                is_separator_regex=False,
            )
            texts = text_splitter.create_documents([semantic_memory])
            for text in texts:
                out = await self.vector_db.add_memories(observation=text.page_content, params=params)
                return out
        elif self.db_type == "pinecone":
            pass
-
+    async def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
    async def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]:
        """Fetch related characteristics, preferences or dislikes for a user."""
        # self.init_pinecone(index_name=self.index)
@ -325,42 +402,42 @@ class SemanticMemory:
        elif self.db_type == "pinecone":
            pass
-    async def _delete_memories(self,params:str=None) -> Coroutine[Any, Any, Any]:
+    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
        """Fetch related characteristics, preferences or dislikes for a user."""
        # self.init_pinecone(index_name=self.index)
        if self.db_type == "weaviate":
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
        elif self.db_type == "pinecone":
            pass
 class EpisodicMemory:
-    def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="EPISODICMEMORY"):
+    def __init__(self, user_id: str, memory_id: str, ltm_memory_id: str, index_name: str, db_type: str = "weaviate",
                 namespace: str = "EPISODICMEMORY"):
        # Add any semantic memory-related attributes or setup here
-        self.user_id=user_id
+        self.user_id = user_id
        self.index_name = index_name
        self.namespace = namespace
        self.episodic_memory_id = str(uuid.uuid4())
        self.memory_id = memory_id
        self.ltm_memory_id = ltm_memory_id
-        self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
+        self.vector_db = VectorDB(user_id=user_id, memory_id=self.memory_id, ltm_memory_id=self.ltm_memory_id,
                                  index_name=index_name, db_type=db_type, namespace=self.namespace)
        self.db_type = db_type
-
+    async def _add_memories(self, observation: str = None, params: dict = None) -> list[str]:
    async def _add_memories(self ,observation:str=None) -> list[str]:
        """Update semantic memory for the user"""
        if self.db_type == "weaviate":
-            return await self.vector_db.add_memories( observation = observation)
+            return await self.vector_db.add_memories(observation=observation, params=params)
        elif self.db_type == "pinecone":
            pass
-
+    def _fetch_memories(self, observation: str, params: str = None) -> Coroutine[Any, Any, Any]:
    def _fetch_memories(self, observation: str,params:str=None) -> Coroutine[Any, Any, Any]:
        """Fetch related characteristics, preferences or dislikes for a user."""
        # self.init_pinecone(index_name=self.index)
@ -370,19 +447,22 @@ class EpisodicMemory:
        elif self.db_type == "pinecone":
            pass
-    async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]:
+
    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
        """Fetch related characteristics, preferences or dislikes for a user."""
        # self.init_pinecone(index_name=self.index)
        if self.db_type == "weaviate":
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
        elif self.db_type == "pinecone":
            pass
 class LongTermMemory:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
                 db_type: str = "weaviate"):
        self.user_id = user_id
        self.memory_id = memory_id
        self.ltm_memory_id = str(uuid.uuid4())
@ -390,14 +470,17 @@ class LongTermMemory:
        self.namespace = namespace
        self.db_type = db_type
        # self.episodic_memory = EpisodicMemory()
-        self.semantic_memory = SemanticMemory(user_id = self.user_id, memory_id=self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=self.index_name, db_type=self.db_type)
+        self.semantic_memory = SemanticMemory(user_id=self.user_id, memory_id=self.memory_id,
                                              ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
                                              db_type=self.db_type)
        self.episodic_memory = EpisodicMemory(user_id=self.user_id, memory_id=self.memory_id,
                                              ltm_memory_id=self.ltm_memory_id, index_name=self.index_name,
                                              db_type=self.db_type)
 class ShortTermMemory:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None, namespace: str = None,
                 db_type: str = "weaviate"):
        # Add any short-term memory-related attributes or setup here
        self.user_id = user_id
        self.memory_id = memory_id
@ -405,12 +488,13 @@ class ShortTermMemory:
        self.db_type = db_type
        self.stm_memory_id = str(uuid.uuid4())
        self.index_name = index_name
-        self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type)
+        self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id,
-
+                                              index_name=self.index_name, db_type=self.db_type)
 class EpisodicBuffer:
-    def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str='EPISODICBUFFER', db_type:str="weaviate"):
+    def __init__(self, user_id: str = "676", memory_id: str = None, index_name: str = None,
                 namespace: str = 'EPISODICBUFFER', db_type: str = "weaviate"):
        # Add any short-term memory-related attributes or setup here
        self.user_id = user_id
        self.memory_id = memory_id
@ -418,7 +502,7 @@ class EpisodicBuffer:
        self.db_type = db_type
        self.st_memory_id = "blah"
        self.index_name = index_name
-        self.llm= ChatOpenAI(
+        self.llm = ChatOpenAI(
            temperature=0.0,
            max_tokens=1200,
            openai_api_key=os.environ.get('OPENAI_API_KEY'),
@ -426,10 +510,8 @@ class EpisodicBuffer:
            callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],
        )
        # self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
        def _compute_weights(self, context: str):
            """Computes the weights for the buffer"""
            pass
@ -456,57 +538,58 @@ class EpisodicBuffer:
    #     json_data = json.dumps(chain_result)
    #     return json_data
-    async def _fetch_memories(self, observation: str,namespace:str) ->  str:
+    async def _fetch_memories(self, observation: str, namespace: str) -> str:
        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                             index_name=self.index_name, db_type=self.db_type, namespace=namespace)
        query = await vector_db.fetch_memories(observation=observation)
        return query
-    async def _add_memories(self, observation: str,namespace:str):
+    async def _add_memories(self, observation: str, namespace: str, params: dict = None):
        vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
                             index_name=self.index_name, db_type=self.db_type, namespace=namespace)
-
+        query = await vector_db.add_memories(observation, params=params)
        query = await vector_db.add_memories(observation)
        return query
-    async def _delete_memories(self, params:str=None) -> Coroutine[Any, Any, Any]:
+    async def _delete_memories(self, params: str = None) -> Coroutine[Any, Any, Any]:
        """Fetch related characteristics, preferences or dislikes for a user."""
        # self.init_pinecone(index_name=self.index)
        if self.db_type == "weaviate":
-            return await self.vector_db.delete_memories( params=params)
+            return await self.vector_db.delete_memories(params=params)
        elif self.db_type == "pinecone":
            pass
-    async def freshness(self, observation: str,namespace:str) -> str:
+    # async def freshness(self, observation: str,namespace:str) -> str:
-        """Freshness - Score between 1 and 5  on how often was the information processed in episodic memory in the past"""
+    #     """Freshness - Score between 1 and 5  on how often was the information processed in episodic memory in the past"""
    #
    #     memory = Memory(user_id=self.user_id)
    #     await memory.async_init()
    #
    #     # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
    #     # print(gg)
    #
    #
    #
    #     ggur = await memory._fetch_episodic_memory(observation=observation)
    #     print(ggur)
-        memory = Memory(user_id=self.user_id)
+    # @ai_classifier
-        await memory.async_init()
+    # class MemoryRoute(Enum):
    #     """Represents classifer for semantic fetching of memories"""
    #
    #     storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
    #     raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
    #     raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
    #     long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
    #     raw_information_to_store_as_events = "EVENTBUFFER"
    #
    # namespace= MemoryRoute(observation)
-        # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
+    # return ggur
        # print(gg)
        ggur = await memory._fetch_episodic_memory(observation="bla bla bla")
        print(ggur)
        # @ai_classifier
        # class MemoryRoute(Enum):
        #     """Represents classifer for semantic fetching of memories"""
        #
        #     storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
        #     raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
        #     raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
        #     long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
        #     raw_information_to_store_as_events = "EVENTBUFFER"
        #
        # namespace= MemoryRoute(observation)
        return ggur
    async def encoding(self, document: str, namespace: str = "EPISODICBUFFER") -> None:
        """Encoding for the buffer, stores raw data in the buffer
@ -522,21 +605,90 @@ class EpisodicBuffer:
        # Here we define the user prompt and the structure of the output we desire
        # prompt = output[0].page_content
        # file_upload
        #
        if content is not None:
-            #We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
+            # operations -> translate, structure, load to db
-            output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
+
-            await self.encoding(output_translated)
+            list_of_operations = ["translate", "structure", "load to db"]
-            freshness_score =await  self.freshness(output_translated, namespace="EPISODICBUFFER")
+
-            print(freshness_score)
+            prompt_filter = ChatPromptTemplate.from_template(
                "Filter and remove uneccessary information that is not relevant in the user query {query}")
            chain_filter = prompt_filter | self.llm
            output = await chain_filter.ainvoke({"query": user_input})
            class Task(BaseModel):
                """Schema for an individual task."""
                task_order: str = Field(..., description="The order at which the task needs to be performed")
                task_name: str = Field(None, description="The task that needs to be performed")
                operation: str = Field(None, description="The operation to be performed")
            class TaskList(BaseModel):
                """Schema for the record containing a list of tasks."""
                tasks: List[Task] = Field(..., description="List of tasks")
            prompt_filter_chunk = f" Based on available operations {list_of_operations} determine only the relevant list of steps and operations sequentially based {output}"
            # chain_filter_chunk = prompt_filter_chunk | self.llm.bind(function_call={"TaskList": "tasks"}, functions=TaskList)
            # output_chunk = await chain_filter_chunk.ainvoke({"query": output, "list_of_operations": list_of_operations})
            prompt_msgs = [
                SystemMessage(
                    content="You are a world class algorithm for decomposing prompts into steps and operations and choosing relevant ones"
                ),
                HumanMessage(content="Decompose based on the following prompt:"),
                HumanMessagePromptTemplate.from_template("{input}"),
                HumanMessage(content="Tips: Make sure to answer in the correct format"),
                HumanMessage(content="Tips: Only choose actions that are relevant to the user query and ignore others")
            ]
            prompt_ = ChatPromptTemplate(messages=prompt_msgs)
            chain = create_structured_output_chain(TaskList, self.llm, prompt_, verbose=True)
            from langchain.callbacks import get_openai_callback
            with get_openai_callback() as cb:
                output = await chain.arun(input=prompt_filter_chunk, verbose=True)
                print(cb)
            # output = json.dumps(output)
            my_object = parse_obj_as(TaskList, output)
            print("HERE IS THE OUTPUT", my_object.json())
            data = json.loads(my_object.json())
            # Extract the list of tasks
            tasks_list = data["tasks"]
            for task in tasks_list:
                class TranslateText(BaseModel):
                    observation: str = Field(
                        description="observation we want to translate"
                    )
                @tool("translate_to_en", args_schema=TranslateText, return_direct=True)
                def translate_to_en(observation, args_schema=TranslateText):
                    """Translate to English"""
                    out = GoogleTranslator(source='auto', target='en').translate(text=observation)
                    return out
                agent = initialize_agent(
                    llm=self.llm,
                    tools=[translate_to_en],
                    agent=AgentType.OPENAI_FUNCTIONS,
                    verbose=True,
                )
                agent.run(task)
            # We need to encode the content. Note, this is not comp-sci encoding, but rather encoding in the sense of storing the content in the buffer
            # output_translated = GoogleTranslator(source='auto', target='en').translate(text=content)
            # await self.encoding(output_translated)
            # freshness_score =await self.freshness(output_translated, namespace="EPISODICBUFFER")
            # print(freshness_score)
            # shows how much the data is relevant for the user, provided by the user in a separate step, starts at 0
-            user_relevance_score ="0"
+            user_relevance_score = "0"
            # similarity score between the user input and the content already available in the buffer
-
+            # write this to episodic memory
            # prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
            # chain_filter = prompt_filter | self.llm
@ -546,12 +698,12 @@ class EpisodicBuffer:
        if content is None:
            # Sensory and Linguistic Processing
-            prompt_filter = ChatPromptTemplate.from_template("Filter and remove uneccessary information that is not relevant in the user query {query}")
+            prompt_filter = ChatPromptTemplate.from_template(
                "Filter and remove uneccessary information that is not relevant in the user query {query}")
            chain_filter = prompt_filter | self.llm
            output = await chain_filter.ainvoke({"query": user_input})
            translation = GoogleTranslator(source='auto', target='en').translate(text=output.content)
            def top_down_processing():
                """Top-down processing"""
                pass
@ -560,29 +712,19 @@ class EpisodicBuffer:
                """Bottom-up processing"""
                pass
            def interactive_processing():
                """interactive processing"""
                pass
            working_memory_activation = "bla"
-
+            prompt_chunk = ChatPromptTemplate.from_template(
-
+                "Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
            working_memory_activation =  "bla"
            prompt_chunk = ChatPromptTemplate.from_template("Can you break down the instruction 'Structure a PDF and load it into duckdb' into smaller tasks or actions? Return only tasks or actions. Be brief")
            chain_chunk = prompt_chunk | self.llm
            output_chunks = await chain_chunk.ainvoke({"query": output.content})
            print(output_chunks.content)
        # vectorstore = Weaviate.from_documents(documents, embeddings, client=client, by_text=False)
        # retriever = WeaviateHybridSearchRetriever(
        #     client=client,
@ -598,7 +740,6 @@ class EpisodicBuffer:
        # query = vector_db.
        # retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
        # memory = VectorStoreRetrieverMemory(retriever=retriever)
        # class PromptWrapper(BaseModel):
@ -728,10 +869,8 @@ class EpisodicBuffer:
        # return output
-
+# DEFINE STM
-
+# DEFINE LTM
 #DEFINE STM
 #DEFINE LTM
 class Memory:
    load_dotenv()
@ -739,7 +878,7 @@ class Memory:
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
    def __init__(self, user_id: str = "676", index_name: str = None, knowledge_source: str = None,
-                 knowledge_type: str = None, db_type:str="weaviate", namespace:str=None) -> None:
+                 knowledge_type: str = None, db_type: str = "weaviate", namespace: str = None) -> None:
        self.user_id = user_id
        self.index_name = index_name
        self.db_type = db_type
@ -752,12 +891,13 @@ class Memory:
        load_dotenv()
    # Asynchronous factory function for creating LongTermMemory
-    async def async_create_long_term_memory(self,user_id, memory_id, index_name, namespace, db_type):
+    async def async_create_long_term_memory(self, user_id, memory_id, index_name, namespace, db_type):
        # Perform asynchronous initialization steps if needed
        return LongTermMemory(
            user_id=user_id, memory_id=memory_id, index_name=index_name,
            namespace=namespace, db_type=db_type
        )
    async def async_init(self):
        # Asynchronous initialization of LongTermMemory and ShortTermMemory
        self.long_term_memory = await self.async_create_long_term_memory(
@ -785,9 +925,9 @@ class Memory:
        #     user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type
        # )
-    async def _add_semantic_memory(self, semantic_memory:str):
+    async def _add_semantic_memory(self, semantic_memory: str, params: dict = None):
        return await self.long_term_memory.semantic_memory._add_memories(
-            semantic_memory=semantic_memory
+            semantic_memory=semantic_memory, params=params
        )
@ -795,67 +935,79 @@ class Memory:
        return await self.long_term_memory.semantic_memory._fetch_memories(
            observation=observation, params=params
        )
-    async def _delete_semantic_memory(self,  params:str=None):
+
    async def _delete_semantic_memory(self, params: str = None):
        return await self.long_term_memory.semantic_memory._delete_memories(
            params=params
        )
-    async def _add_episodic_memory(self, observation:str):
+    async def _add_episodic_memory(self, observation: str, params: dict = None):
        return await self.long_term_memory.episodic_memory._add_memories(
-            observation=observation
+            observation=observation, params=params
        )
-    async def _fetch_episodic_memory(self, observation, params:str=None):
+    async def _fetch_episodic_memory(self, observation, params: str = None):
        return await self.long_term_memory.episodic_memory._fetch_memories(
            observation=observation, params=params
        )
-    async def _delete_episodic_memory(self, params:str=None):
+    async def _delete_episodic_memory(self, params: str = None):
        return await self.long_term_memory.episodic_memory._delete_memories(
-             params=params
+            params=params
        )
-    async def _run_buffer(self, user_input:str, content:str=None):
+    async def _run_buffer(self, user_input: str, content: str = None):
        return await self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input, content=content)
    async def _add_buffer_memory(self, user_input: str, namespace: str = None, params: dict = None):
        return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace,
                                                                          params=params)
-
+    async def _fetch_buffer_memory(self, user_input: str, namespace: str = None):
    async def _add_buffer_memory(self, user_input: str, namespace: str = None ):
        return await self.short_term_memory.episodic_buffer._add_memories(observation=user_input, namespace=namespace)
    async def _fetch_buffer_memory(self, user_input: str, namespace: str = None ):
        return await self.short_term_memory.episodic_buffer._fetch_memories(observation=user_input, namespace=namespace)
-    async def _delete_buffer_memory(self, params:str=None):
+    async def _delete_buffer_memory(self, params: str = None):
        return await self.long_term_memory.episodic_buffer._delete_memories(
-             params=params
+            params=params
        )
 async def main():
    memory = Memory(user_id="123")
    await memory.async_init()
    params = {
        "version": "1.0",
        "agreement_id": "AG123456",
        "privacy_policy": "https://example.com/privacy",
        "terms_of_service": "https://example.com/terms",
        "format": "json",
        "schema_version": "1.1",
        "checksum": "a1b2c3d4e5f6",
        "owner": "John Doe",
        "license": "MIT",
        "validity_start": "2023-08-01",
        "validity_end": "2024-07-31"
    }
-    # gg = await memory._run_buffer(user_input= "bla", content = "blablabla ")
+    gg = await memory._run_buffer(user_input="i NEED TRANSLATION TO GERMAN ", content="i NEED TRANSLATION TO GERMAN ")
    # print(gg)
    gg = await memory._delete_episodic_memory()
    print(gg)
-    # ggur = await memory._add_episodic_memory(observation = "bla bla bla")
+    # gg = await memory._delete_episodic_memory()
    # print(gg)
    # ggur = await memory._add_episodic_memory(observation = "bla bla bla", params=params)
    # print(ggur)
    # ggur = await memory._fetch_episodic_memory(observation = "bla bla bla")
    # print(ggur)
    # fff = await memory._fetch_memories_buffer(user_input = "bla bla bla", namespace="Test")
    # print(fff)
 if __name__ == "__main__":
    import asyncio
    asyncio.run(main())
    # bb = agent._update_semantic_memory(semantic_memory="Users core summary")