Merge pull request #4 from topoteretes/code_review

Code review
This commit is contained in:
Vasilije 2023-08-25 12:12:52 +02:00 committed by GitHub
commit 1050647a5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 935 additions and 705 deletions

6
.gitignore vendored
View file

@ -1,3 +1,6 @@
.env
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
@ -157,4 +160,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

View file

@ -24,21 +24,43 @@ Initial code lets you do three operations:
## Usage
The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
The fast API endpoint accepts prompts and stores data with the help of the Memory Manager
The types of memory are: Episodic, Semantic, Buffer
Endpoint Overview
The Memory API provides the following endpoints:
- /[memory_type]/add-memory (POST)
- /[memory_type]/fetch-memory (POST)
- /[memory_type]/delete-memory (POST)
- /available-buffer-actions (GET)
- /run-buffer (POST)
- /buffer/create-context (POST)
Here is a payload example:
```curl
-X POST
-F "prompt=The quick brown fox"
-F "file=@/path/to/file.pdf"
http://localhost:8000/upload/
```
{
"payload": {
"user_id": "681",
"session_id": "471",
"model_speed": "slow",
"prompt": "Temperature=Cold;Food Type=Ice Cream",
"pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
"prompt": "I want ",
"pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
"params": {
"version": "1.0",
"agreement_id": "AG123456",
"privacy_policy": "https://example.com/privacy",
"terms_of_service": "https://example.com/terms",
"format": "json",
"schema_version": "1.1",
"checksum": "a1b2c3d4e5f6",
"owner": "John Doe",
"license": "MIT",
"validity_start": "2023-08-01",
"validity_end": "2024-07-31"
}
}
}
}
```

View file

@ -1,3 +1,5 @@
from io import BytesIO
from langchain.document_loaders import PyPDFLoader
from level_2_pdf_vectorstore__dlt_contracts import Memory
@ -27,7 +29,7 @@ from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
app = FastAPI(debug=True)
@ -63,82 +65,73 @@ def health_check():
#curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
from fastapi import FastAPI, UploadFile, File
import requests
import os
import json
app = FastAPI()
from io import BytesIO
class Payload(BaseModel):
payload: Dict[str, Any]
@app.post("/upload/", response_model=dict)
async def upload_pdf_and_payload(
payload: Payload,
# files: List[UploadFile] = File(...),
):
try:
# Process the payload
decoded_payload = payload.payload
# except:
# pass
#
# return JSONResponse(content={"response": decoded_payload}, status_code=200)
# Download the remote PDF if URL is provided
if 'pdf_url' in decoded_payload:
pdf_response = requests.get(decoded_payload['pdf_url'])
pdf_content = pdf_response.content
logging.info("Downloaded PDF from URL")
# Create an in-memory file-like object for the PDF content
pdf_stream = BytesIO(pdf_content)
contents = pdf_stream.read()
tmp_location = os.path.join('/tmp', "tmp.pdf")
with open(tmp_location, 'wb') as tmp_file:
tmp_file.write(contents)
logging.info("Wrote PDF from URL")
# Process the PDF using PyPDFLoader
loader = PyPDFLoader(tmp_location)
pages = loader.load_and_split()
logging.info(" PDF split into pages")
Memory_ = Memory(index_name="my-agent", user_id='555' )
await Memory_.async_init()
Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
# Run the buffer
response = Memory_._run_buffer(user_input="I want to get a schema for my data")
return JSONResponse(content={"response": response}, status_code=200)
#to do: add the user id to the payload
#to do add the raw pdf to payload
# bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
# print(bb)
except Exception as e:
return {"error": str(e)}
# Here you can perform your processing on the PDF contents
# results.append({"filename": file.filename, "size": len(contents)})
# Append the in-memory file to the files list
# files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
# @app.post("/upload/", response_model=dict)
# async def upload_pdf_and_payload(
# payload: Payload,
# # files: List[UploadFile] = File(...),
# ):
# try:
# # Process the payload
# decoded_payload = payload.payload
# # except:
# # pass
# #
# # return JSONResponse(content={"response": decoded_payload}, status_code=200)
#
# # Download the remote PDF if URL is provided
# if 'pdf_url' in decoded_payload:
# pdf_response = requests.get(decoded_payload['pdf_url'])
# pdf_content = pdf_response.content
#
# logging.info("Downloaded PDF from URL")
#
# # Create an in-memory file-like object for the PDF content
# pdf_stream = BytesIO(pdf_content)
#
# contents = pdf_stream.read()
#
# tmp_location = os.path.join('/tmp', "tmp.pdf")
# with open(tmp_location, 'wb') as tmp_file:
# tmp_file.write(contents)
#
# logging.info("Wrote PDF from URL")
#
# # Process the PDF using PyPDFLoader
# loader = PyPDFLoader(tmp_location)
# pages = loader.load_and_split()
# logging.info(" PDF split into pages")
# Memory_ = Memory(index_name="my-agent", user_id='555' )
# await Memory_.async_init()
# Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
#
#
# # Run the buffer
# response = Memory_._run_buffer(user_input="I want to get a schema for my data")
# return JSONResponse(content={"response": response}, status_code=200)
#
# #to do: add the user id to the payload
# #to do add the raw pdf to payload
# # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
# # print(bb)
#
#
# except Exception as e:
#
# return {"error": str(e)}
# # Here you can perform your processing on the PDF contents
# # results.append({"filename": file.filename, "size": len(contents)})
#
# # Append the in-memory file to the files list
# # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
#
def memory_factory(memory_type):
load_dotenv()
class Payload(BaseModel):
payload: Dict[str, Any]
@app.post("/{memory_type}/add-memory", response_model=dict)
@ -148,23 +141,47 @@ def memory_factory(memory_type):
):
try:
logging.info(" Init PDF processing")
decoded_payload = payload.payload
Memory_ = Memory( user_id='555')
if 'pdf_url' in decoded_payload:
pdf_response = requests.get(decoded_payload['pdf_url'])
pdf_content = pdf_response.content
await Memory_.async_init()
logging.info("Downloaded PDF from URL")
memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
output= memory_class(observation=decoded_payload['prompt'])
return JSONResponse(content={"response": output}, status_code=200)
# Create an in-memory file-like object for the PDF content
pdf_stream = BytesIO(pdf_content)
contents = pdf_stream.read()
tmp_location = os.path.join('/tmp', "tmp.pdf")
with open(tmp_location, 'wb') as tmp_file:
tmp_file.write(contents)
logging.info("Wrote PDF from URL")
# Process the PDF using PyPDFLoader
loader = PyPDFLoader(tmp_location)
# pages = loader.load_and_split()
logging.info(" PDF split into pages")
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
output= await memory_class(observation=str(loader), params =decoded_payload['params'])
return JSONResponse(content={"response": output}, status_code=200)
except Exception as e:
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
@app.post("/{memory_type}/fetch-memory", response_model=dict)
async def add_memory(
async def fetch_memory(
payload: Payload,
# files: List[UploadFile] = File(...),
):
@ -172,7 +189,7 @@ def memory_factory(memory_type):
decoded_payload = payload.payload
Memory_ = Memory(user_id='555')
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
@ -185,7 +202,7 @@ def memory_factory(memory_type):
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
@app.post("/{memory_type}/delete-memory", response_model=dict)
async def add_memory(
async def delete_memory(
payload: Payload,
# files: List[UploadFile] = File(...),
):
@ -193,7 +210,7 @@ def memory_factory(memory_type):
decoded_payload = payload.payload
Memory_ = Memory(user_id='555')
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
@ -210,6 +227,71 @@ for memory_type in memory_list:
memory_factory(memory_type)
@app.get("/available-buffer-actions", response_model=dict)
async def available_buffer_actions(
payload: Payload,
# files: List[UploadFile] = File(...),
):
try:
decoded_payload = payload.payload
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
output = await Memory_._available_operations()
return JSONResponse(content={"response": output}, status_code=200)
except Exception as e:
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
@app.post("/run-buffer", response_model=dict)
async def available_buffer_actions(
payload: Payload,
# files: List[UploadFile] = File(...),
):
try:
decoded_payload = payload.payload
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
output = await Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
return JSONResponse(content={"response": output}, status_code=200)
except Exception as e:
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
@app.post("/buffer/create-context", response_model=dict)
async def available_buffer_actions(
payload: Payload,
# files: List[UploadFile] = File(...),
):
try:
decoded_payload = payload.payload
Memory_ = Memory(user_id=decoded_payload['user_id'])
await Memory_.async_init()
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
output = await Memory_._create_buffer_context(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
return JSONResponse(content={"response": output}, status_code=200)
except Exception as e:
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
#
# # Process each uploaded PDF file
# results = []

File diff suppressed because it is too large Load diff

73
level_2/poetry.lock generated
View file

@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""}
[[package]]
name = "boto3"
version = "1.28.30"
version = "1.28.32"
description = "The AWS SDK for Python"
optional = false
python-versions = ">= 3.7"
files = [
{file = "boto3-1.28.30-py3-none-any.whl", hash = "sha256:e095ede98d3680e65966ab71f273b7d86938f5d853773ef96f4cb646277c2a4b"},
{file = "boto3-1.28.30.tar.gz", hash = "sha256:2b509a959966a572f15db5768a18066ce1f53022ac53fca9421c620219fa3998"},
{file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"},
{file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"},
]
[package.dependencies]
botocore = ">=1.31.30,<1.32.0"
botocore = ">=1.31.32,<1.32.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.6.0,<0.7.0"
@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.31.30"
version = "1.31.32"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">= 3.7"
files = [
{file = "botocore-1.31.30-py3-none-any.whl", hash = "sha256:269f20dcadd8dfd0c26d0e6fbceb84814ff6638ff3aafcc5324b9fb9949a7051"},
{file = "botocore-1.31.30.tar.gz", hash = "sha256:3cf6a9d7621b897c9ff23cd02113826141b3dd3d7e90273b661efc4dc05f84e2"},
{file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"},
{file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"},
]
[package.dependencies]
@ -1053,13 +1053,13 @@ requests = ">=2.20.0,<3.0"
[[package]]
name = "gptcache"
version = "0.1.39.1"
version = "0.1.40"
description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
optional = false
python-versions = ">=3.8.1"
files = [
{file = "gptcache-0.1.39.1-py3-none-any.whl", hash = "sha256:81355f7878e12a820dccb017f8a45ea44b73178dac07108c56db664a476a4a07"},
{file = "gptcache-0.1.39.1.tar.gz", hash = "sha256:a9c629fdeaa94b78a6cfe707a5f9a3a52b361655a3f01327709ca00c78a500eb"},
{file = "gptcache-0.1.40-py3-none-any.whl", hash = "sha256:ba323e5e46b100fa7663b5f4d164cc2aee60f343184ed03ec2d2bb95e9f47c50"},
{file = "gptcache-0.1.40.tar.gz", hash = "sha256:5fe4bcf3a45946177cb845b3e1ec01159f10622600e1384b9de0c7c6065d10d5"},
]
[package.dependencies]
@ -1362,39 +1362,38 @@ files = [
[[package]]
name = "langchain"
version = "0.0.250"
version = "0.0.271"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"},
{file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"},
{file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
{file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
]
[package.dependencies]
aiohttp = ">=3.8.3,<4.0.0"
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
dataclasses-json = ">=0.5.7,<0.6.0"
langsmith = ">=0.0.11,<0.1.0"
langsmith = ">=0.0.21,<0.1.0"
numexpr = ">=2.8.4,<3.0.0"
numpy = ">=1,<2"
openapi-schema-pydantic = ">=1.2,<2.0"
pydantic = ">=1,<2"
PyYAML = ">=5.4.1"
pydantic = ">=1,<3"
PyYAML = ">=5.3"
requests = ">=2,<3"
SQLAlchemy = ">=1.4,<3"
tenacity = ">=8.1.0,<9.0.0"
[package.extras]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
clarifai = ["clarifai (>=9.1.0)"]
cohere = ["cohere (>=4,<5)"]
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
embeddings = ["sentence-transformers (>=2,<3)"]
extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"]
extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
javascript = ["esprima (>=4.0.1,<5.0.0)"]
llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
@ -1415,13 +1414,13 @@ data = ["language-data (>=1.1,<2.0)"]
[[package]]
name = "langsmith"
version = "0.0.25"
version = "0.0.26"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langsmith-0.0.25-py3-none-any.whl", hash = "sha256:d595435ad21fa6077550d7c85472935d1e8241afa042c1e29287d2c95c3ed151"},
{file = "langsmith-0.0.25.tar.gz", hash = "sha256:e728c398fc1adaa0ed8abeb21f6a92d7fb19fe3ab49d3911c22b03dfe25935d6"},
{file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"},
{file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"},
]
[package.dependencies]
@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc
embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
[[package]]
name = "openapi-schema-pydantic"
version = "1.2.4"
description = "OpenAPI (v3) specification schema as pydantic class"
optional = false
python-versions = ">=3.6.1"
files = [
{file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"},
{file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"},
]
[package.dependencies]
pydantic = ">=1.8.2"
[[package]]
name = "orjson"
version = "3.9.5"
@ -3561,13 +3546,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python
[[package]]
name = "weaviate-client"
version = "3.22.1"
version = "3.23.0"
description = "A python native Weaviate client"
optional = false
python-versions = ">=3.8"
files = [
{file = "weaviate-client-3.22.1.tar.gz", hash = "sha256:aff61bd3f5d74df20a62328443e3aa9c860d5330fdfb19c4d8ddc44cb604032f"},
{file = "weaviate_client-3.22.1-py3-none-any.whl", hash = "sha256:01843a4899a227300e570409e77628e9d1b28476313f94943c37aee3f75112e1"},
{file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"},
{file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"},
]
[package.dependencies]
@ -3581,13 +3566,13 @@ grpc = ["grpcio", "grpcio-tools"]
[[package]]
name = "wheel"
version = "0.41.1"
version = "0.41.2"
description = "A built-package format for Python"
optional = false
python-versions = ">=3.7"
files = [
{file = "wheel-0.41.1-py3-none-any.whl", hash = "sha256:473219bd4cbedc62cea0cb309089b593e47c15c4a2531015f94e4e3b9a0f6981"},
{file = "wheel-0.41.1.tar.gz", hash = "sha256:12b911f083e876e10c595779709f8a88a59f45aacc646492a67fe9ef796c1b47"},
{file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"},
{file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"},
]
[package.extras]
@ -3795,4 +3780,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "13258c777467d93ab73021225322da670e42513cedec6252a40aacf74822ea68"
content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991"

View file

@ -8,7 +8,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
langchain = "v0.0.250"
langchain = "v0.0.271"
nltk = "3.8.1"
openai = "0.27.8"
@ -39,6 +39,7 @@ dlt = { version ="^0.3.8", extras = ["duckdb"]}
weaviate-client = "^3.22.1"
python-multipart = "^0.0.6"
deep-translator = "^1.11.4"
humanize = "^4.8.0"

View file

@ -0,0 +1,76 @@
import unittest
import asyncio
import sys
sys.path.append("..") # Adds higher directory to python modules path.
from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory
class TestMemory(unittest.TestCase):
def setUp(self):
self.loop = asyncio.get_event_loop()
self.memory = Memory(user_id="123")
self.loop.run_until_complete(self.memory.async_init())
def test_add_fetch_delete_semantic_memory(self):
async def semantic_workflow():
params = {"sample_param": "value"}
sample_memory = "sample semantic memory"
# Add
await self.memory._add_semantic_memory(sample_memory, params=params)
# Fetch
fetched = await self.memory._fetch_semantic_memory(sample_memory, params)
fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation
# Delete
await self.memory._delete_semantic_memory()
# Verify Deletion
after_delete = await self.memory._fetch_semantic_memory(sample_memory, params)
self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation
self.loop.run_until_complete(semantic_workflow())
def test_add_fetch_delete_episodic_memory(self):
async def episodic_workflow():
params = {"sample_param": "value"}
sample_memory = """{
"sample_key": "sample_value"
}"""
# Add
await self.memory._add_episodic_memory(observation=sample_memory, params=params)
# Fetch
fetched = await self.memory._fetch_episodic_memory(sample_memory)
fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation
# Delete
await self.memory._delete_episodic_memory()
# Verify Deletion
after_delete = await self.memory._fetch_episodic_memory(sample_memory)
self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation
self.loop.run_until_complete(episodic_workflow())
# def test_add_fetch_delete_buffer_memory(self):
# async def buffer_workflow():
# params = {"sample_param": "value"}
# user_input = "sample buffer input"
# namespace = "sample_namespace"
#
# # Add
# await self.memory._add_buffer_memory(user_input=user_input, namespace=namespace, params=params)
# # Fetch
# fetched = await self.memory._fetch_buffer_memory(user_input, namespace)
# self.assertIn(user_input, fetched) # Replace this with the appropriate validation
# # Delete
# await self.memory._delete_buffer_memory()
# # Verify Deletion
# after_delete = await self.memory._fetch_buffer_memory(user_input, namespace)
# self.assertNotIn(user_input, after_delete) # Replace with the appropriate validation
#
# self.loop.run_until_complete(buffer_workflow())
if __name__ == '__main__':
unittest.main()