commit
1050647a5f
7 changed files with 935 additions and 705 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -1,3 +1,6 @@
|
|||
|
||||
.env
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
|
@ -157,4 +160,5 @@ cython_debug/
|
|||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
#.idea/
|
||||
|
||||
|
|
|
|||
|
|
@ -24,21 +24,43 @@ Initial code lets you do three operations:
|
|||
|
||||
## Usage
|
||||
|
||||
The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
|
||||
The fast API endpoint accepts prompts and stores data with the help of the Memory Manager
|
||||
|
||||
The types of memory are: Episodic, Semantic, Buffer
|
||||
|
||||
Endpoint Overview
|
||||
The Memory API provides the following endpoints:
|
||||
|
||||
- /[memory_type]/add-memory (POST)
|
||||
- /[memory_type]/fetch-memory (POST)
|
||||
- /[memory_type]/delete-memory (POST)
|
||||
- /available-buffer-actions (GET)
|
||||
- /run-buffer (POST)
|
||||
- /buffer/create-context (POST)
|
||||
|
||||
Here is a payload example:
|
||||
|
||||
```curl
|
||||
-X POST
|
||||
-F "prompt=The quick brown fox"
|
||||
-F "file=@/path/to/file.pdf"
|
||||
http://localhost:8000/upload/
|
||||
```
|
||||
|
||||
{
|
||||
"payload": {
|
||||
"user_id": "681",
|
||||
"session_id": "471",
|
||||
"model_speed": "slow",
|
||||
"prompt": "Temperature=Cold;Food Type=Ice Cream",
|
||||
"pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||
"prompt": "I want ",
|
||||
"pdf_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
|
||||
"params": {
|
||||
"version": "1.0",
|
||||
"agreement_id": "AG123456",
|
||||
"privacy_policy": "https://example.com/privacy",
|
||||
"terms_of_service": "https://example.com/terms",
|
||||
"format": "json",
|
||||
"schema_version": "1.1",
|
||||
"checksum": "a1b2c3d4e5f6",
|
||||
"owner": "John Doe",
|
||||
"license": "MIT",
|
||||
"validity_start": "2023-08-01",
|
||||
"validity_end": "2024-07-31"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
240
level_2/api.py
240
level_2/api.py
|
|
@ -1,3 +1,5 @@
|
|||
from io import BytesIO
|
||||
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
|
||||
from level_2_pdf_vectorstore__dlt_contracts import Memory
|
||||
|
|
@ -27,7 +29,7 @@ from dotenv import load_dotenv
|
|||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
|
||||
app = FastAPI(debug=True)
|
||||
|
||||
|
|
@ -63,82 +65,73 @@ def health_check():
|
|||
|
||||
#curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
|
||||
|
||||
from fastapi import FastAPI, UploadFile, File
|
||||
import requests
|
||||
import os
|
||||
import json
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class Payload(BaseModel):
|
||||
payload: Dict[str, Any]
|
||||
|
||||
@app.post("/upload/", response_model=dict)
|
||||
async def upload_pdf_and_payload(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
try:
|
||||
# Process the payload
|
||||
decoded_payload = payload.payload
|
||||
# except:
|
||||
# pass
|
||||
#
|
||||
# return JSONResponse(content={"response": decoded_payload}, status_code=200)
|
||||
|
||||
# Download the remote PDF if URL is provided
|
||||
if 'pdf_url' in decoded_payload:
|
||||
pdf_response = requests.get(decoded_payload['pdf_url'])
|
||||
pdf_content = pdf_response.content
|
||||
|
||||
logging.info("Downloaded PDF from URL")
|
||||
|
||||
# Create an in-memory file-like object for the PDF content
|
||||
pdf_stream = BytesIO(pdf_content)
|
||||
|
||||
contents = pdf_stream.read()
|
||||
|
||||
tmp_location = os.path.join('/tmp', "tmp.pdf")
|
||||
with open(tmp_location, 'wb') as tmp_file:
|
||||
tmp_file.write(contents)
|
||||
|
||||
logging.info("Wrote PDF from URL")
|
||||
|
||||
# Process the PDF using PyPDFLoader
|
||||
loader = PyPDFLoader(tmp_location)
|
||||
pages = loader.load_and_split()
|
||||
logging.info(" PDF split into pages")
|
||||
Memory_ = Memory(index_name="my-agent", user_id='555' )
|
||||
await Memory_.async_init()
|
||||
Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
|
||||
|
||||
|
||||
# Run the buffer
|
||||
response = Memory_._run_buffer(user_input="I want to get a schema for my data")
|
||||
return JSONResponse(content={"response": response}, status_code=200)
|
||||
|
||||
#to do: add the user id to the payload
|
||||
#to do add the raw pdf to payload
|
||||
# bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
|
||||
# print(bb)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return {"error": str(e)}
|
||||
# Here you can perform your processing on the PDF contents
|
||||
# results.append({"filename": file.filename, "size": len(contents)})
|
||||
|
||||
# Append the in-memory file to the files list
|
||||
# files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
|
||||
|
||||
# @app.post("/upload/", response_model=dict)
|
||||
# async def upload_pdf_and_payload(
|
||||
# payload: Payload,
|
||||
# # files: List[UploadFile] = File(...),
|
||||
# ):
|
||||
# try:
|
||||
# # Process the payload
|
||||
# decoded_payload = payload.payload
|
||||
# # except:
|
||||
# # pass
|
||||
# #
|
||||
# # return JSONResponse(content={"response": decoded_payload}, status_code=200)
|
||||
#
|
||||
# # Download the remote PDF if URL is provided
|
||||
# if 'pdf_url' in decoded_payload:
|
||||
# pdf_response = requests.get(decoded_payload['pdf_url'])
|
||||
# pdf_content = pdf_response.content
|
||||
#
|
||||
# logging.info("Downloaded PDF from URL")
|
||||
#
|
||||
# # Create an in-memory file-like object for the PDF content
|
||||
# pdf_stream = BytesIO(pdf_content)
|
||||
#
|
||||
# contents = pdf_stream.read()
|
||||
#
|
||||
# tmp_location = os.path.join('/tmp', "tmp.pdf")
|
||||
# with open(tmp_location, 'wb') as tmp_file:
|
||||
# tmp_file.write(contents)
|
||||
#
|
||||
# logging.info("Wrote PDF from URL")
|
||||
#
|
||||
# # Process the PDF using PyPDFLoader
|
||||
# loader = PyPDFLoader(tmp_location)
|
||||
# pages = loader.load_and_split()
|
||||
# logging.info(" PDF split into pages")
|
||||
# Memory_ = Memory(index_name="my-agent", user_id='555' )
|
||||
# await Memory_.async_init()
|
||||
# Memory_._add_episodic_memory(user_input="I want to get a schema for my data", content =pages)
|
||||
#
|
||||
#
|
||||
# # Run the buffer
|
||||
# response = Memory_._run_buffer(user_input="I want to get a schema for my data")
|
||||
# return JSONResponse(content={"response": response}, status_code=200)
|
||||
#
|
||||
# #to do: add the user id to the payload
|
||||
# #to do add the raw pdf to payload
|
||||
# # bb = await Memory_._run_buffer(user_input=decoded_payload['prompt'])
|
||||
# # print(bb)
|
||||
#
|
||||
#
|
||||
# except Exception as e:
|
||||
#
|
||||
# return {"error": str(e)}
|
||||
# # Here you can perform your processing on the PDF contents
|
||||
# # results.append({"filename": file.filename, "size": len(contents)})
|
||||
#
|
||||
# # Append the in-memory file to the files list
|
||||
# # files.append(UploadFile(pdf_stream, filename="downloaded.pdf"))
|
||||
#
|
||||
|
||||
|
||||
def memory_factory(memory_type):
|
||||
load_dotenv()
|
||||
class Payload(BaseModel):
|
||||
payload: Dict[str, Any]
|
||||
@app.post("/{memory_type}/add-memory", response_model=dict)
|
||||
|
|
@ -148,23 +141,47 @@ def memory_factory(memory_type):
|
|||
):
|
||||
try:
|
||||
|
||||
logging.info(" Init PDF processing")
|
||||
|
||||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory( user_id='555')
|
||||
if 'pdf_url' in decoded_payload:
|
||||
pdf_response = requests.get(decoded_payload['pdf_url'])
|
||||
pdf_content = pdf_response.content
|
||||
|
||||
await Memory_.async_init()
|
||||
logging.info("Downloaded PDF from URL")
|
||||
|
||||
memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
|
||||
output= memory_class(observation=decoded_payload['prompt'])
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
# Create an in-memory file-like object for the PDF content
|
||||
pdf_stream = BytesIO(pdf_content)
|
||||
|
||||
contents = pdf_stream.read()
|
||||
|
||||
tmp_location = os.path.join('/tmp', "tmp.pdf")
|
||||
with open(tmp_location, 'wb') as tmp_file:
|
||||
tmp_file.write(contents)
|
||||
|
||||
logging.info("Wrote PDF from URL")
|
||||
|
||||
# Process the PDF using PyPDFLoader
|
||||
loader = PyPDFLoader(tmp_location)
|
||||
# pages = loader.load_and_split()
|
||||
logging.info(" PDF split into pages")
|
||||
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
memory_class = getattr(Memory_, f"_add_{memory_type}_memory", None)
|
||||
output= await memory_class(observation=str(loader), params =decoded_payload['params'])
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
|
||||
|
||||
@app.post("/{memory_type}/fetch-memory", response_model=dict)
|
||||
async def add_memory(
|
||||
async def fetch_memory(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
|
|
@ -172,7 +189,7 @@ def memory_factory(memory_type):
|
|||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory(user_id='555')
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
|
|
@ -185,7 +202,7 @@ def memory_factory(memory_type):
|
|||
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
|
||||
|
||||
@app.post("/{memory_type}/delete-memory", response_model=dict)
|
||||
async def add_memory(
|
||||
async def delete_memory(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
|
|
@ -193,7 +210,7 @@ def memory_factory(memory_type):
|
|||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory(user_id='555')
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
|
|
@ -210,6 +227,71 @@ for memory_type in memory_list:
|
|||
memory_factory(memory_type)
|
||||
|
||||
|
||||
|
||||
@app.get("/available-buffer-actions", response_model=dict)
|
||||
async def available_buffer_actions(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
try:
|
||||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
|
||||
output = await Memory_._available_operations()
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
|
||||
|
||||
@app.post("/run-buffer", response_model=dict)
|
||||
async def available_buffer_actions(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
try:
|
||||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
|
||||
output = await Memory_._run_buffer(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
|
||||
|
||||
@app.post("/buffer/create-context", response_model=dict)
|
||||
async def available_buffer_actions(
|
||||
payload: Payload,
|
||||
# files: List[UploadFile] = File(...),
|
||||
):
|
||||
try:
|
||||
|
||||
decoded_payload = payload.payload
|
||||
|
||||
Memory_ = Memory(user_id=decoded_payload['user_id'])
|
||||
|
||||
await Memory_.async_init()
|
||||
|
||||
# memory_class = getattr(Memory_, f"_delete_{memory_type}_memory", None)
|
||||
output = await Memory_._create_buffer_context(user_input=decoded_payload['prompt'], params=decoded_payload['params'])
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return JSONResponse(content={"response": {"error": str(e)}}, status_code=503)
|
||||
|
||||
|
||||
#
|
||||
# # Process each uploaded PDF file
|
||||
# results = []
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
73
level_2/poetry.lock
generated
73
level_2/poetry.lock
generated
|
|
@ -261,17 +261,17 @@ numpy = {version = ">=1.19.0", markers = "python_version >= \"3.9\""}
|
|||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.28.30"
|
||||
version = "1.28.32"
|
||||
description = "The AWS SDK for Python"
|
||||
optional = false
|
||||
python-versions = ">= 3.7"
|
||||
files = [
|
||||
{file = "boto3-1.28.30-py3-none-any.whl", hash = "sha256:e095ede98d3680e65966ab71f273b7d86938f5d853773ef96f4cb646277c2a4b"},
|
||||
{file = "boto3-1.28.30.tar.gz", hash = "sha256:2b509a959966a572f15db5768a18066ce1f53022ac53fca9421c620219fa3998"},
|
||||
{file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"},
|
||||
{file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
botocore = ">=1.31.30,<1.32.0"
|
||||
botocore = ">=1.31.32,<1.32.0"
|
||||
jmespath = ">=0.7.1,<2.0.0"
|
||||
s3transfer = ">=0.6.0,<0.7.0"
|
||||
|
||||
|
|
@ -280,13 +280,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
|
|||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.31.30"
|
||||
version = "1.31.32"
|
||||
description = "Low-level, data-driven core of boto 3."
|
||||
optional = false
|
||||
python-versions = ">= 3.7"
|
||||
files = [
|
||||
{file = "botocore-1.31.30-py3-none-any.whl", hash = "sha256:269f20dcadd8dfd0c26d0e6fbceb84814ff6638ff3aafcc5324b9fb9949a7051"},
|
||||
{file = "botocore-1.31.30.tar.gz", hash = "sha256:3cf6a9d7621b897c9ff23cd02113826141b3dd3d7e90273b661efc4dc05f84e2"},
|
||||
{file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"},
|
||||
{file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -1053,13 +1053,13 @@ requests = ">=2.20.0,<3.0"
|
|||
|
||||
[[package]]
|
||||
name = "gptcache"
|
||||
version = "0.1.39.1"
|
||||
version = "0.1.40"
|
||||
description = "GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, similar to how Redis works for traditional applications."
|
||||
optional = false
|
||||
python-versions = ">=3.8.1"
|
||||
files = [
|
||||
{file = "gptcache-0.1.39.1-py3-none-any.whl", hash = "sha256:81355f7878e12a820dccb017f8a45ea44b73178dac07108c56db664a476a4a07"},
|
||||
{file = "gptcache-0.1.39.1.tar.gz", hash = "sha256:a9c629fdeaa94b78a6cfe707a5f9a3a52b361655a3f01327709ca00c78a500eb"},
|
||||
{file = "gptcache-0.1.40-py3-none-any.whl", hash = "sha256:ba323e5e46b100fa7663b5f4d164cc2aee60f343184ed03ec2d2bb95e9f47c50"},
|
||||
{file = "gptcache-0.1.40.tar.gz", hash = "sha256:5fe4bcf3a45946177cb845b3e1ec01159f10622600e1384b9de0c7c6065d10d5"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -1362,39 +1362,38 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "0.0.250"
|
||||
version = "0.0.271"
|
||||
description = "Building applications with LLMs through composability"
|
||||
optional = false
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
files = [
|
||||
{file = "langchain-0.0.250-py3-none-any.whl", hash = "sha256:65b3520f507e848edd88a35a70700971bbbf822fda65f621ccf44a3bb36ad03a"},
|
||||
{file = "langchain-0.0.250.tar.gz", hash = "sha256:1b5775d6a472f633bb06e794f58cb6ff5d1eeb2da603b64a6a15013f8f61ee3f"},
|
||||
{file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
|
||||
{file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.8.3,<4.0.0"
|
||||
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
|
||||
dataclasses-json = ">=0.5.7,<0.6.0"
|
||||
langsmith = ">=0.0.11,<0.1.0"
|
||||
langsmith = ">=0.0.21,<0.1.0"
|
||||
numexpr = ">=2.8.4,<3.0.0"
|
||||
numpy = ">=1,<2"
|
||||
openapi-schema-pydantic = ">=1.2,<2.0"
|
||||
pydantic = ">=1,<2"
|
||||
PyYAML = ">=5.4.1"
|
||||
pydantic = ">=1,<3"
|
||||
PyYAML = ">=5.3"
|
||||
requests = ">=2,<3"
|
||||
SQLAlchemy = ">=1.4,<3"
|
||||
tenacity = ">=8.1.0,<9.0.0"
|
||||
|
||||
[package.extras]
|
||||
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "anthropic (>=0.3,<0.4)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=0.11.0,<0.12.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)", "xinference (>=0.0.6,<0.0.7)"]
|
||||
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
|
||||
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
|
||||
clarifai = ["clarifai (>=9.1.0)"]
|
||||
cohere = ["cohere (>=4,<5)"]
|
||||
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
|
||||
embeddings = ["sentence-transformers (>=2,<3)"]
|
||||
extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xinference (>=0.0.6,<0.0.7)", "zep-python (>=0.32)"]
|
||||
extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
|
||||
javascript = ["esprima (>=4.0.1,<5.0.0)"]
|
||||
llms = ["anthropic (>=0.3,<0.4)", "clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.19)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)", "xinference (>=0.0.6,<0.0.7)"]
|
||||
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
|
||||
openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
|
||||
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
|
||||
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
|
||||
|
|
@ -1415,13 +1414,13 @@ data = ["language-data (>=1.1,<2.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "langsmith"
|
||||
version = "0.0.25"
|
||||
version = "0.0.26"
|
||||
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
|
||||
optional = false
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
files = [
|
||||
{file = "langsmith-0.0.25-py3-none-any.whl", hash = "sha256:d595435ad21fa6077550d7c85472935d1e8241afa042c1e29287d2c95c3ed151"},
|
||||
{file = "langsmith-0.0.25.tar.gz", hash = "sha256:e728c398fc1adaa0ed8abeb21f6a92d7fb19fe3ab49d3911c22b03dfe25935d6"},
|
||||
{file = "langsmith-0.0.26-py3-none-any.whl", hash = "sha256:61c1d4582104d96edde04e1eea1dae347645b691c44489a5871341a2a1a2a1eb"},
|
||||
{file = "langsmith-0.0.26.tar.gz", hash = "sha256:80a4ef1b663a24a460d25b9986ab2010c5d06b6061c65be473abafc0647d191a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -1875,20 +1874,6 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc
|
|||
embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
|
||||
wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
|
||||
|
||||
[[package]]
|
||||
name = "openapi-schema-pydantic"
|
||||
version = "1.2.4"
|
||||
description = "OpenAPI (v3) specification schema as pydantic class"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"},
|
||||
{file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pydantic = ">=1.8.2"
|
||||
|
||||
[[package]]
|
||||
name = "orjson"
|
||||
version = "3.9.5"
|
||||
|
|
@ -3561,13 +3546,13 @@ colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python
|
|||
|
||||
[[package]]
|
||||
name = "weaviate-client"
|
||||
version = "3.22.1"
|
||||
version = "3.23.0"
|
||||
description = "A python native Weaviate client"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "weaviate-client-3.22.1.tar.gz", hash = "sha256:aff61bd3f5d74df20a62328443e3aa9c860d5330fdfb19c4d8ddc44cb604032f"},
|
||||
{file = "weaviate_client-3.22.1-py3-none-any.whl", hash = "sha256:01843a4899a227300e570409e77628e9d1b28476313f94943c37aee3f75112e1"},
|
||||
{file = "weaviate-client-3.23.0.tar.gz", hash = "sha256:3ffd7f1460c9e32755d84d4f5fc63dfc0bd990dbe2c3dc20d5c68119d467680e"},
|
||||
{file = "weaviate_client-3.23.0-py3-none-any.whl", hash = "sha256:3d3bb75c1d96b2b71e213c5eb885ae3e3f42e4304955383c467d100187d9ff8e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -3581,13 +3566,13 @@ grpc = ["grpcio", "grpcio-tools"]
|
|||
|
||||
[[package]]
|
||||
name = "wheel"
|
||||
version = "0.41.1"
|
||||
version = "0.41.2"
|
||||
description = "A built-package format for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "wheel-0.41.1-py3-none-any.whl", hash = "sha256:473219bd4cbedc62cea0cb309089b593e47c15c4a2531015f94e4e3b9a0f6981"},
|
||||
{file = "wheel-0.41.1.tar.gz", hash = "sha256:12b911f083e876e10c595779709f8a88a59f45aacc646492a67fe9ef796c1b47"},
|
||||
{file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"},
|
||||
{file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
|
@ -3795,4 +3780,4 @@ multidict = ">=4.0"
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "13258c777467d93ab73021225322da670e42513cedec6252a40aacf74822ea68"
|
||||
content-hash = "5629225437c5aec01f9f862d46d6d1e68abde4c42a0c1ad709df875883171991"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ readme = "README.md"
|
|||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
|
||||
langchain = "v0.0.250"
|
||||
langchain = "v0.0.271"
|
||||
|
||||
nltk = "3.8.1"
|
||||
openai = "0.27.8"
|
||||
|
|
@ -39,6 +39,7 @@ dlt = { version ="^0.3.8", extras = ["duckdb"]}
|
|||
weaviate-client = "^3.22.1"
|
||||
python-multipart = "^0.0.6"
|
||||
deep-translator = "^1.11.4"
|
||||
humanize = "^4.8.0"
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
76
level_2/tests/crud_test.py
Normal file
76
level_2/tests/crud_test.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import unittest
|
||||
import asyncio
|
||||
|
||||
import sys
|
||||
sys.path.append("..") # Adds higher directory to python modules path.
|
||||
|
||||
from level_2.level_2_pdf_vectorstore__dlt_contracts import Memory
|
||||
class TestMemory(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.loop = asyncio.get_event_loop()
|
||||
self.memory = Memory(user_id="123")
|
||||
self.loop.run_until_complete(self.memory.async_init())
|
||||
|
||||
def test_add_fetch_delete_semantic_memory(self):
|
||||
async def semantic_workflow():
|
||||
params = {"sample_param": "value"}
|
||||
sample_memory = "sample semantic memory"
|
||||
|
||||
# Add
|
||||
await self.memory._add_semantic_memory(sample_memory, params=params)
|
||||
# Fetch
|
||||
fetched = await self.memory._fetch_semantic_memory(sample_memory, params)
|
||||
fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
|
||||
self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation
|
||||
# Delete
|
||||
await self.memory._delete_semantic_memory()
|
||||
# Verify Deletion
|
||||
after_delete = await self.memory._fetch_semantic_memory(sample_memory, params)
|
||||
self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation
|
||||
|
||||
self.loop.run_until_complete(semantic_workflow())
|
||||
|
||||
def test_add_fetch_delete_episodic_memory(self):
|
||||
async def episodic_workflow():
|
||||
params = {"sample_param": "value"}
|
||||
sample_memory = """{
|
||||
"sample_key": "sample_value"
|
||||
}"""
|
||||
|
||||
# Add
|
||||
await self.memory._add_episodic_memory(observation=sample_memory, params=params)
|
||||
# Fetch
|
||||
fetched = await self.memory._fetch_episodic_memory(sample_memory)
|
||||
fetched_text = fetched['data']['Get']['EPISODICMEMORY'][0]['text']
|
||||
self.assertIn(sample_memory, fetched_text) # Replace this with the appropriate validation
|
||||
# Delete
|
||||
await self.memory._delete_episodic_memory()
|
||||
# Verify Deletion
|
||||
after_delete = await self.memory._fetch_episodic_memory(sample_memory)
|
||||
self.assertNotIn(sample_memory, after_delete) # Replace with the appropriate validation
|
||||
|
||||
self.loop.run_until_complete(episodic_workflow())
|
||||
|
||||
# def test_add_fetch_delete_buffer_memory(self):
|
||||
# async def buffer_workflow():
|
||||
# params = {"sample_param": "value"}
|
||||
# user_input = "sample buffer input"
|
||||
# namespace = "sample_namespace"
|
||||
#
|
||||
# # Add
|
||||
# await self.memory._add_buffer_memory(user_input=user_input, namespace=namespace, params=params)
|
||||
# # Fetch
|
||||
# fetched = await self.memory._fetch_buffer_memory(user_input, namespace)
|
||||
# self.assertIn(user_input, fetched) # Replace this with the appropriate validation
|
||||
# # Delete
|
||||
# await self.memory._delete_buffer_memory()
|
||||
# # Verify Deletion
|
||||
# after_delete = await self.memory._fetch_buffer_memory(user_input, namespace)
|
||||
# self.assertNotIn(user_input, after_delete) # Replace with the appropriate validation
|
||||
#
|
||||
# self.loop.run_until_complete(buffer_workflow())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Add table
Reference in a new issue