Merge pull request #4 from topoteretes/code_review

Code review
2023-08-25 12:12:52 +02:00 · 2023-08-25 12:12:52 +02:00 · 1050647a5f
commit 1050647a5f
parent 92a9e271f7 9d87eb3c23
7 changed files with 935 additions and 705 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
+
+.env
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@ -157,4 +160,5 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+#.idea/
+
--- a/level_2/Readme.md
+++ b/level_2/Readme.md
@ -24,21 +24,43 @@ Initial code lets you do three operations:

 ## Usage

-The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
+The fast API endpoint accepts prompts and stores data with the help of the Memory Manager
+
+The types of memory are: Episodic, Semantic, Buffer
+
+Endpoint Overview
+The Memory API provides the following endpoints:
+
+- /[memory_type]/add-memory (POST)
+- /[memory_type]/fetch-memory (POST)
+- /[memory_type]/delete-memory (POST)
+- /available-buffer-actions (GET)
+- /run-buffer (POST)
+- /buffer/create-context (POST)
+
+Here is a payload example:

-```curl                                                                    
-    -X POST                                                                                             
-    -F "prompt=The quick brown fox"                                                                     
-    -F "file=@/path/to/file.pdf"                                                                       
-    http://localhost:8000/upload/                                                                    
 ```
-
 {
  "payload": {
    "user_id": "681",
    "session_id": "471",
    "model_speed": "slow",
-    "prompt": "Temperature=Cold;Food Type=Ice Cream",
-    "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
+    "prompt": "I want ",
+    "pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
+    "params": {
+        "version": "1.0",
+        "agreement_id": "AG123456",
+        "privacy_policy": "https://example.com/privacy",
+        "terms_of_service": "https://example.com/terms",
+        "format": "json",
+        "schema_version": "1.1",
+        "checksum": "a1b2c3d4e5f6",
+        "owner": "John Doe",
+        "license": "MIT",
+        "validity_start": "2023-08-01",
+        "validity_end": "2024-07-31"
+    }
  }
-}
+}
+```
--- a/level_2/api.py
+++ b/level_2/api.py
@ -1,3 +1,5 @@
+from io import BytesIO
+
 from langchain.document_loaders import PyPDFLoader

 from level_2_pdf_vectorstore__dlt_contracts import Memory
@ -27,7 +29,7 @@ from dotenv import load_dotenv


 load_dotenv()
-
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

 app = FastAPI(debug=True)

@ -63,82 +65,73 @@ def health_check():

 #curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/

-from fastapi import FastAPI, UploadFile, File
-import requests
-import os
-import json
-
-app = FastAPI()
-
-
-from io import BytesIO
-

 class Payload(BaseModel):
    payload: Dict[str, Any]

-@app.post("/upload/", response_model=dict)
-async def upload_pdf_and_payload(
-        payload: Payload,
-        # files: List[UploadFile] = File(...),
-):
-    try:
-        # Process the payload
-        decoded_payload = payload.payload
-    # except:
-    #     pass
-    #
-    # return JSONResponse(content={"response": decoded_payload}, status_code=200)
-
-        # Download the remote PDF if URL is provided
-        if 'pdf_url' in decoded_payload:
-            pdf_response = requests.get(decoded_payload['pdf_url'])
-            pdf_content = pdf_response.content
-
-            logging.info("Downloaded PDF from URL")
-
-            # Create an in-memory file-like object for the PDF content
-            pdf_stream = BytesIO(pdf_content)
-
-            contents = pdf_stream.read()
-
-            tmp_location = os.path.join('/tmp', "tmp.pdf")
-            with open(tmp_location, 'wb') as tmp_file:
-                tmp_file.write(contents)
-
-            logging.info("Wrote PDF from URL")
-
-            # Process the PDF using PyPDFLoader
-            loader = PyPDFLoader(tmp_location)
-            pages = loader.load_and_split()
-            logging.info(" PDF split into pages")
-            Memory_ = Memory(index_name="my-agent", user_id='555' )
-            await Memory_.async_init()
-            Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
-
-
-            # Run the buffer
-            response = Memory_._run_buffer(user_input="I want to get a schema for my data")
-            return JSONResponse(content={"response": response}, status_code=200)
-
-            #to do: add the user id to the payload
-            #to do add the raw pdf to payload
-            # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
-            # print(bb)
-
-
-    except Exception as e:
-
-        return {"error": str(e)}
-            # Here you can perform your processing on the PDF contents
-            # results.append({"filename": file.filename, "size": len(contents)})
-
-            # Append the in-memory file to the files list
-            # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
-
+# @app.post("/upload/", response_model=dict)
+# async def upload_pdf_and_payload(
+#         payload: Payload,
+#         # files: List[UploadFile] = File(...),
+# ):
+#     try:
+#         # Process the payload
+#         decoded_payload = payload.payload
+#     # except:
+#     #     pass
+#     #
+#     # return JSONResponse(content={"response": decoded_payload}, status_code=200)
+#
+#         # Download the remote PDF if URL is provided
+#         if 'pdf_url' in decoded_payload:
+#             pdf_response = requests.get(decoded_payload['pdf_url'])
+#             pdf_content = pdf_response.content
+#
+#             logging.info("Downloaded PDF from URL")
+#
+#             # Create an in-memory file-like object for the PDF content
+#             pdf_stream = BytesIO(pdf_content)
+#
+#             contents = pdf_stream.read()
+#
+#             tmp_location = os.path.join('/tmp', "tmp.pdf")
+#             with open(tmp_location, 'wb') as tmp_file:
+#                 tmp_file.write(contents)
+#
+#             logging.info("Wrote PDF from URL")
+#
+#             # Process the PDF using PyPDFLoader
+#             loader = PyPDFLoader(tmp_location)
+#             pages = loader.load_and_split()
+#             logging.info(" PDF split into pages")
+#             Memory_ = Memory(index_name="my-agent", user_id='555' )
+#             await Memory_.async_init()
+#             Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
+#
+#
+#             # Run the buffer
+#             response = Memory_._run_buffer(user_input="I want to get a schema for my data")
+#             return JSONResponse(content={"response": response}, status_code=200)
+#
+#             #to do: add the user id to the payload
+#             #to do add the raw pdf to payload
+#             # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
+#             # print(bb)
+#
+#
+#     except Exception as e:
+#
+#         return {"error": str(e)}
+#             # Here you can perform your processing on the PDF contents
+#             # results.append({"filename": file.filename, "size": len(contents)})
+#
+#             # Append the in-memory file to the files list
+#             # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
+#


 def memory_factory(memory_type):
+    load_dotenv()
    class Payload(BaseModel):
        payload: Dict[str, Any]
    @app.post("/{memory_type}/add-memory", response_model=dict)
@ -148,23 +141,47 @@ def memory_factory(memory_type):
    ):
        try:

+            logging.info(" Init PDF processing")
+

            decoded_payload = payload.payload

-            Memory_ = Memory( user_id='555')
+            if 'pdf_url' in decoded_payload:
+                pdf_response = requests.get(decoded_payload['pdf_url'])
+                pdf_content = pdf_response.content

-            await Memory_.async_init()
+                logging.info("Downloaded PDF from URL")

-            memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
-            output= memory_class(observation=decoded_payload['prompt'])
-            return JSONResponse(content={"response": output}, status_code=200)
+                # Create an in-memory file-like object for the PDF content
+                pdf_stream = BytesIO(pdf_content)
+
+                contents = pdf_stream.read()
+
+                tmp_location = os.path.join('/tmp', "tmp.pdf")
+                with open(tmp_location, 'wb') as tmp_file:
+                    tmp_file.write(contents)
+
+                logging.info("Wrote PDF from URL")
+
+                # Process the PDF using PyPDFLoader
+                loader = PyPDFLoader(tmp_location)
+                # pages = loader.load_and_split()
+                logging.info(" PDF split into pages")
+
+                Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+                await Memory_.async_init()
+
+                memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
+                output= await memory_class(observation=str(loader), params =decoded_payload['params'])
+                return JSONResponse(content={"response": output}, status_code=200)

        except Exception as e:

            return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)

    @app.post("/{memory_type}/fetch-memory", response_model=dict)
-    async def add_memory(
+    async def fetch_memory(
            payload: Payload,
            # files: List[UploadFile] = File(...),
    ):
@ -172,7 +189,7 @@ def memory_factory(memory_type):

            decoded_payload = payload.payload

-            Memory_ = Memory(user_id='555')
+            Memory_ = Memory(user_id=decoded_payload['user_id'])

            await Memory_.async_init()

@ -185,7 +202,7 @@ def memory_factory(memory_type):
            return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)

    @app.post("/{memory_type}/delete-memory", response_model=dict)
-    async def add_memory(
+    async def delete_memory(
            payload: Payload,
            # files: List[UploadFile] = File(...),
    ):
@ -193,7 +210,7 @@ def memory_factory(memory_type):

            decoded_payload = payload.payload

-            Memory_ = Memory(user_id='555')
+            Memory_ = Memory(user_id=decoded_payload['user_id'])

            await Memory_.async_init()

@ -210,6 +227,71 @@ for memory_type in memory_list:
    memory_factory(memory_type)


+
+@app.get("/available-buffer-actions", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = await Memory_._available_operations()
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
+
+@app.post("/run-buffer", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = await Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
+
+@app.post("/buffer/create-context", response_model=dict)
+async def available_buffer_actions(
+        payload: Payload,
+        # files: List[UploadFile] = File(...),
+):
+    try:
+
+        decoded_payload = payload.payload
+
+        Memory_ = Memory(user_id=decoded_payload['user_id'])
+
+        await Memory_.async_init()
+
+        # memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
+        output = await Memory_._create_buffer_context(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
+        return JSONResponse(content={"response": output}, status_code=200)
+
+    except Exception as e:
+
+        return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
+
+
 #
    #     # Process each uploaded PDF file
    #     results = []
--- a/level_2/level_2_pdf_vectorstore__dlt_contracts.py
+++ b/level_2/level_2_pdf_vectorstore__dlt_contracts.py
--- a/level_2/poetry.lock
+++ b/level_2/poetry.lock
@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""}

 [[package]]
 name = "boto3"
-version = "1.28.30"
+version = "1.28.32"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "boto3-1.28.30-py3-none-any.whl", hash = "sha256:e095ede98d3680e65966ab71f273b7d86938f5d853773ef96f4cb646277c2a4b"},
-    {file = "boto3-1.28.30.tar.gz", hash = "sha256:2b509a959966a572f15db5768a18066ce1f53022ac53fca9421c620219fa3998"},
+    {file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"},
+    {file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"},
 ]

 [package.dependencies]
-botocore = ">=1.31.30,<1.32.0"
+botocore = ">=1.31.32,<1.32.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.6.0,<0.7.0"

@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.31.30"
+version = "1.31.32"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">= 3.7"
 files = [
-    {file = "botocore-1.31.30-py3-none-any.whl", hash = "sha256:269f20dcadd8dfd0c26d0e6fbceb84814ff6638ff3aafcc5324b9fb9949a7051"},
-    {file = "botocore-1.31.30.tar.gz", hash = "sha256:3cf6a9d7621b897c9ff23cd02113826141b3dd3d7e90273b661efc4dc05f84e2"},
+    {file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"},
+    {file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"},
 ]

 [package.dependencies]
@ -1053,13 +1053,13 @@ requests = ">=2.20.0,<3.0"

 [[package]]
 name = "gptcache"
-version = "0.1.39.1"
+version = "0.1.40"
 description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
 optional = false
 python-versions = ">=3.8.1"
 files = [
-    {file = "gptcache-0.1.39.1-py3-none-any.whl", hash = "sha256:81355f7878e12a820dccb017f8a45ea44b73178dac07108c56db664a476a4a07"},
-    {file = "gptcache-0.1.39.1.tar.gz", hash = "sha256:a9c629fdeaa94b78a6cfe707a5f9a3a52b361655a3f01327709ca00c78a500eb"},
+    {file = "gptcache-0.1.40-py3-none-any.whl", hash = "sha256:ba323e5e46b100fa7663b5f4d164cc2aee60f343184ed03ec2d2bb95e9f47c50"},
+    {file = "gptcache-0.1.40.tar.gz", hash = "sha256:5fe4bcf3a45946177cb845b3e1ec01159f10622600e1384b9de0c7c6065d10d5"},
 ]

 [package.dependencies]
@ -1362,39 +1362,38 @@ files = [

 [[package]]
 name = "langchain"
-version = "0.0.250"
+version = "0.0.271"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"},
-    {file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"},
+    {file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
+    {file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
 ]

 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
 dataclasses-json = ">=0.5.7,<0.6.0"
-langsmith = ">=0.0.11,<0.1.0"
+langsmith = ">=0.0.21,<0.1.0"
 numexpr = ">=2.8.4,<3.0.0"
 numpy = ">=1,<2"
-openapi-schema-pydantic = ">=1.2,<2.0"
-pydantic = ">=1,<2"
-PyYAML = ">=5.4.1"
+pydantic = ">=1,<3"
+PyYAML = ">=5.3"
 requests = ">=2,<3"
 SQLAlchemy = ">=1.4,<3"
 tenacity = ">=8.1.0,<9.0.0"

 [package.extras]
-all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"]
+all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
 azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
 clarifai = ["clarifai (>=9.1.0)"]
 cohere = ["cohere (>=4,<5)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"]
+extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
-llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"]
+llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
 openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
 qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
 text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
@ -1415,13 +1414,13 @@ data = ["language-data (>=1.1,<2.0)"]

 [[package]]
 name = "langsmith"
-version = "0.0.25"
+version = "0.0.26"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langsmith-0.0.25-py3-none-any.whl", hash = "sha256:d595435ad21fa6077550d7c85472935d1e8241afa042c1e29287d2c95c3ed151"},
-    {file = "langsmith-0.0.25.tar.gz", hash = "sha256:e728c398fc1adaa0ed8abeb21f6a92d7fb19fe3ab49d3911c22b03dfe25935d6"},
+    {file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"},
+    {file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"},
 ]

 [package.dependencies]
@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc
 embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
 wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]

-[[package]]
-name = "openapi-schema-pydantic"
-version = "1.2.4"
-description = "OpenAPI (v3) specification schema as pydantic class"
-optional = false
-python-versions = ">=3.6.1"
-files = [
-    {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"},
-    {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"},
-]
-
-[package.dependencies]
-pydantic = ">=1.8.2"
-
 [[package]]
 name = "orjson"
 version = "3.9.5"
@ -3561,13 +3546,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python

 [[package]]
 name = "weaviate-client"
-version = "3.22.1"
+version = "3.23.0"
 description = "A python native Weaviate client"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "weaviate-client-3.22.1.tar.gz", hash = "sha256:aff61bd3f5d74df20a62328443e3aa9c860d5330fdfb19c4d8ddc44cb604032f"},
-    {file = "weaviate_client-3.22.1-py3-none-any.whl", hash = "sha256:01843a4899a227300e570409e77628e9d1b28476313f94943c37aee3f75112e1"},
+    {file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"},
+    {file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"},
 ]

 [package.dependencies]
@ -3581,13 +3566,13 @@ grpc = ["grpcio", "grpcio-tools"]

 [[package]]
 name = "wheel"
-version = "0.41.1"
+version = "0.41.2"
 description = "A built-package format for Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "wheel-0.41.1-py3-none-any.whl", hash = "sha256:473219bd4cbedc62cea0cb309089b593e47c15c4a2531015f94e4e3b9a0f6981"},
-    {file = "wheel-0.41.1.tar.gz", hash = "sha256:12b911f083e876e10c595779709f8a88a59f45aacc646492a67fe9ef796c1b47"},
+    {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"},
+    {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"},
 ]

 [package.extras]
@ -3795,4 +3780,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "13258c777467d93ab73021225322da670e42513cedec6252a40aacf74822ea68"
+content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991"
--- a/level_2/pyproject.toml
+++ b/level_2/pyproject.toml
@ -8,7 +8,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.10"
 #langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
-langchain = "v0.0.250"
+langchain = "v0.0.271"

 nltk = "3.8.1"
 openai = "0.27.8"
@ -39,6 +39,7 @@ dlt = { version ="^0.3.8",  extras = ["duckdb"]}
 weaviate-client = "^3.22.1"
 python-multipart = "^0.0.6"
 deep-translator = "^1.11.4"
+humanize = "^4.8.0"



--- a/level_2/tests/crud_test.py
+++ b/level_2/tests/crud_test.py
@ -0,0 +1,76 @@
+import unittest
+import asyncio
+
+import sys
+sys.path.append("..")  # Adds higher directory to python modules path.
+
+from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory
+class TestMemory(unittest.TestCase):
+
+    def setUp(self):
+        self.loop = asyncio.get_event_loop()
+        self.memory = Memory(user_id="123")
+        self.loop.run_until_complete(self.memory.async_init())
+
+    def test_add_fetch_delete_semantic_memory(self):
+        async def semantic_workflow():
+            params = {"sample_param": "value"}
+            sample_memory = "sample semantic memory"
+
+            # Add
+            await self.memory._add_semantic_memory(sample_memory, params=params)
+            # Fetch
+            fetched = await self.memory._fetch_semantic_memory(sample_memory, params)
+            fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
+            self.assertIn(sample_memory, fetched_text)  # Replace this with the appropriate validation
+            # Delete
+            await self.memory._delete_semantic_memory()
+            # Verify Deletion
+            after_delete = await self.memory._fetch_semantic_memory(sample_memory, params)
+            self.assertNotIn(sample_memory, after_delete)  # Replace with the appropriate validation
+
+        self.loop.run_until_complete(semantic_workflow())
+
+    def test_add_fetch_delete_episodic_memory(self):
+        async def episodic_workflow():
+            params = {"sample_param": "value"}
+            sample_memory = """{
+                                "sample_key": "sample_value"
+                              }"""
+
+            # Add
+            await self.memory._add_episodic_memory(observation=sample_memory, params=params)
+            # Fetch
+            fetched = await self.memory._fetch_episodic_memory(sample_memory)
+            fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
+            self.assertIn(sample_memory, fetched_text)  # Replace this with the appropriate validation
+            # Delete
+            await self.memory._delete_episodic_memory()
+            # Verify Deletion
+            after_delete = await self.memory._fetch_episodic_memory(sample_memory)
+            self.assertNotIn(sample_memory, after_delete)  # Replace with the appropriate validation
+
+        self.loop.run_until_complete(episodic_workflow())
+
+    # def test_add_fetch_delete_buffer_memory(self):
+    #     async def buffer_workflow():
+    #         params = {"sample_param": "value"}
+    #         user_input = "sample buffer input"
+    #         namespace = "sample_namespace"
+    #
+    #         # Add
+    #         await self.memory._add_buffer_memory(user_input=user_input, namespace=namespace, params=params)
+    #         # Fetch
+    #         fetched = await self.memory._fetch_buffer_memory(user_input, namespace)
+    #         self.assertIn(user_input, fetched)  # Replace this with the appropriate validation
+    #         # Delete
+    #         await self.memory._delete_buffer_memory()
+    #         # Verify Deletion
+    #         after_delete = await self.memory._fetch_buffer_memory(user_input, namespace)
+    #         self.assertNotIn(user_input, after_delete)  # Replace with the appropriate validation
+    #
+    #     self.loop.run_until_complete(buffer_workflow())
+
+
+if __name__ == '__main__':
+    unittest.main()