Resolve some of the PR comments

This commit is contained in:
Vasilije 2023-10-05 17:15:45 +02:00
parent c9bfe1752d
commit 44c595d929
11 changed files with 448 additions and 114 deletions

View file

@ -7,7 +7,7 @@ from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from level_2_pdf_vectorstore__dlt_contracts import Memory
from vectorstore_manager import Memory
from dotenv import load_dotenv
# Set up logging
logging.basicConfig(

View file

@ -20,14 +20,13 @@ def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_ove
return chunked_data
def vanilla_chunker(source_data, chunk_size, chunk_overlap):
# loader = PyPDFLoader(source_data)
def vanilla_chunker(source_data, chunk_size=100, chunk_overlap=20):
# adapt this for different chunking strategies
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size=100,
chunk_overlap=20,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len
)
pages = text_splitter.create_documents([source_data])

View file

@ -0,0 +1,13 @@
from setuptools import setup, find_packages
setup(
name='cognitive_memory',
version='0.0.1',
description='Library for cognitive memory in VectorDBs with RAG test framework',
author='Vasilije Markovic',
author_email='vasilije@topoteretes.com',
packages=find_packages(),
install_requires=[
# List your dependencies here
],
)

View file

@ -5,12 +5,16 @@ import fitz
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from chunkers.chunkers import chunk_data
from llama_hub.file.base import SimpleDirectoryReader
from langchain.document_loaders import PyPDFLoader
import requests
def _document_loader( observation: str, loader_settings: dict):
# Check the format of the document
document_format = loader_settings.get("format", "text")
loader_strategy = loader_settings.get("strategy", "VANILLA")
chunk_size = loader_settings.get("chunk_size", 100)
chunk_overlap = loader_settings.get("chunk_overlap", 20)
if document_format == "PDF":
if loader_settings.get("source") == "url":
@ -20,20 +24,19 @@ def _document_loader( observation: str, loader_settings: dict):
file_content = ""
for page in doc:
file_content += page.get_text()
pages = chunk_data(chunk_strategy= 'VANILLA', source_data=file_content)
pages = chunk_data(chunk_strategy= loader_strategy, source_data=file_content, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
return pages
elif loader_settings.get("source") == "file":
# Process the PDF using PyPDFLoader
# might need adapting for different loaders + OCR
# need to test the path
loader = PyPDFLoader(loader_settings["path"])
pages = loader.load_and_split()
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
documents = loader.load_data()
pages = documents.load_and_split()
return pages
elif document_format == "text":
# Process the text directly
return observation
pages = chunk_data(chunk_strategy= loader_strategy, source_data=observation, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
return pages
else:
raise ValueError(f"Unsupported document format: {document_format}")

View file

@ -13,6 +13,7 @@ class TestSet(Base):
id = Column(String, primary_key=True)
user_id = Column(String, ForeignKey('users.id'), index=True)
content = Column(String, ForeignKey('users.id'), index=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, onupdate=datetime.utcnow)

323
level_3/poetry.lock generated
View file

@ -169,6 +169,27 @@ files = [
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
]
[[package]]
name = "atlassian-python-api"
version = "3.41.2"
description = "Python Atlassian REST API Wrapper"
optional = false
python-versions = "*"
files = [
{file = "atlassian-python-api-3.41.2.tar.gz", hash = "sha256:a2022977da5a395412ace8e29c2c541312f07d45fc750435dec036af53daceda"},
{file = "atlassian_python_api-3.41.2-py3-none-any.whl", hash = "sha256:27c2361a22ee8cc69988f67a591488cbfce09e5f284da000011af11944d2bc96"},
]
[package.dependencies]
deprecated = "*"
oauthlib = "*"
requests = "*"
requests-oauthlib = "*"
six = "*"
[package.extras]
kerberos = ["requests-kerberos"]
[[package]]
name = "attrs"
version = "23.1.0"
@ -664,17 +685,19 @@ pdf = ["pypdf (>=3.3.0,<4.0.0)"]
[[package]]
name = "deepeval"
version = "0.10.12"
description = "Deep eval provides evaluation platform to accelerate development of LLMs and Agents"
version = "0.20.0"
description = "DeepEval provides evaluation and unit testing to accelerate development of LLMs and Agents."
optional = false
python-versions = "*"
files = [
{file = "deepeval-0.10.12-py3-none-any.whl", hash = "sha256:239eb720e8a205afab1ae2425e483177bd76cde658bdac98658a6559bdba4f3f"},
{file = "deepeval-0.10.12.tar.gz", hash = "sha256:80968d57a9da6c4fce6247d31ebf7fea228c76393e0d985804be68b722090732"},
{file = "deepeval-0.20.0-py3-none-any.whl", hash = "sha256:81b73d0742974b6ee516c26d8235f3aa62dca765893d41f0eddd870507f70373"},
{file = "deepeval-0.20.0.tar.gz", hash = "sha256:0e7ec2bbe69b03f9b5f21e5b285559363c2a84a9df25b1f7a278091f31fe7049"},
]
[package.dependencies]
pandas = "*"
protobuf = "<=3.20.5"
pydantic = "*"
pytest = "*"
requests = "*"
rich = "*"
@ -686,6 +709,7 @@ typer = "*"
[package.extras]
bias = ["Dbias", "tensorflow"]
dev = ["black"]
toxic = ["detoxify"]
[[package]]
@ -1233,6 +1257,17 @@ doc = ["sphinx (>=5.0.0)", "sphinx-rtd-theme (>=1.0.0)", "towncrier (>=21,<22)"]
lint = ["black (>=22)", "flake8 (==6.0.0)", "flake8-bugbear (==23.3.23)", "isort (>=5.10.1)", "mypy (==0.971)", "pydocstyle (>=5.0.0)"]
test = ["eth-utils (>=1.0.1,<3)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0.0)", "pytest-xdist (>=2.4.0)"]
[[package]]
name = "html2text"
version = "2020.1.16"
description = "Turn HTML into equivalent Markdown-structured text."
optional = false
python-versions = ">=3.5"
files = [
{file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"},
{file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"},
]
[[package]]
name = "httpcore"
version = "0.17.3"
@ -1439,20 +1474,22 @@ files = [
[[package]]
name = "langchain"
version = "0.0.271"
version = "0.0.303"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain-0.0.271-py3-none-any.whl", hash = "sha256:3ca68c9cf04edb42ce9225adc65ee739e5e00ed55d08aeb06a47391f3c59018c"},
{file = "langchain-0.0.271.tar.gz", hash = "sha256:f79d19405b755608216d1850de4a945a2bceb35c5ca8e4f7a4f9e29a366b097e"},
{file = "langchain-0.0.303-py3-none-any.whl", hash = "sha256:1745961f66b60bc3b513820a34c560dd37c4ba4b7499ba82545dc4816d0133bd"},
{file = "langchain-0.0.303.tar.gz", hash = "sha256:84d2727eb8b3b27a9d0aa0da9f05408c2564a4a923c7d5b154a16e488430e725"},
]
[package.dependencies]
aiohttp = ">=3.8.3,<4.0.0"
anyio = "<4.0"
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
dataclasses-json = ">=0.5.7,<0.6.0"
langsmith = ">=0.0.21,<0.1.0"
dataclasses-json = ">=0.5.7,<0.7"
jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.0.38,<0.1.0"
numexpr = ">=2.8.4,<3.0.0"
numpy = ">=1,<2"
pydantic = ">=1,<3"
@ -1463,12 +1500,12 @@ tenacity = ">=8.1.0,<9.0.0"
[package.extras]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b6)", "openai (>=0,<1)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"]
clarifai = ["clarifai (>=9.1.0)"]
cohere = ["cohere (>=4,<5)"]
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
embeddings = ["sentence-transformers (>=2,<3)"]
extended-testing = ["amazon-textract-caller (<2)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.0.7,<0.0.8)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
extended-testing = ["amazon-textract-caller (<2)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
javascript = ["esprima (>=4.0.1,<5.0.0)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"]
@ -1491,19 +1528,65 @@ data = ["language-data (>=1.1,<2.0)"]
[[package]]
name = "langsmith"
version = "0.0.28"
version = "0.0.42"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langsmith-0.0.28-py3-none-any.whl", hash = "sha256:f398782f41526c74e141e68fa28b9020e0be4bde18a1d4a76b357c8272fb81bd"},
{file = "langsmith-0.0.28.tar.gz", hash = "sha256:34c15f9a8908be180001c58048b659ece6320d0bf8ffce4ca496a2428b35646e"},
{file = "langsmith-0.0.42-py3-none-any.whl", hash = "sha256:e10a5084bdd71735a00e91850d4a293b6206825834027676d76fec8d0d044d0a"},
{file = "langsmith-0.0.42.tar.gz", hash = "sha256:66fec6bce07cd18c8d9a7b9d7be216de5f7a93790c2f4cf37efb6956f9fffbf6"},
]
[package.dependencies]
pydantic = ">=1,<3"
requests = ">=2,<3"
[[package]]
name = "llama-hub"
version = "0.0.34"
description = "A library of community-driven data loaders for LLMs. Use with LlamaIndex and/or LangChain. "
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "llama_hub-0.0.34-py3-none-any.whl", hash = "sha256:0f73abbd7a6ceed9d57ff803fa1149f848f3e8ea80882dfe7b6f715237595943"},
{file = "llama_hub-0.0.34.tar.gz", hash = "sha256:85569173d2d6004b7bc8dc095e7f9760558e161d4475e968a86d3b4406e992fc"},
]
[package.dependencies]
atlassian-python-api = "*"
html2text = "*"
llama-index = ">=0.6.9"
psutil = "*"
retrying = "*"
[[package]]
name = "llama-index"
version = "0.8.39.post2"
description = "Interface between LLMs and your data"
optional = false
python-versions = "*"
files = [
{file = "llama_index-0.8.39.post2-py3-none-any.whl", hash = "sha256:52fd490a14dada49270a746b8efc7874ab2a98265a61b46678e62f1bb89a0a9d"},
{file = "llama_index-0.8.39.post2.tar.gz", hash = "sha256:3145b15a6330c7c08cedbd60dcfad19b8d40553d4a0da1da248ead113c67d8a4"},
]
[package.dependencies]
beautifulsoup4 = "*"
dataclasses-json = "*"
fsspec = ">=2023.5.0"
langchain = ">=0.0.303"
nest-asyncio = "*"
nltk = "*"
numpy = "*"
openai = ">=0.26.4"
pandas = "*"
sqlalchemy = ">=2.0.15"
tenacity = ">=8.2.0,<9.0.0"
tiktoken = "*"
typing-extensions = ">=4.5.0"
typing-inspect = ">=0.8.0"
urllib3 = "<2"
[[package]]
name = "loguru"
version = "0.7.0"
@ -1845,6 +1928,17 @@ files = [
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
[[package]]
name = "nest-asyncio"
version = "1.5.8"
description = "Patch asyncio to allow nested event loops"
optional = false
python-versions = ">=3.5"
files = [
{file = "nest_asyncio-1.5.8-py3-none-any.whl", hash = "sha256:accda7a339a70599cb08f9dd09a67e0c2ef8d8d6f4c07f96ab203f2ae254e48d"},
{file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"},
]
[[package]]
name = "networkx"
version = "3.1"
@ -1964,6 +2058,22 @@ files = [
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
]
[[package]]
name = "oauthlib"
version = "3.2.2"
description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
optional = false
python-versions = ">=3.6"
files = [
{file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"},
{file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"},
]
[package.extras]
rsa = ["cryptography (>=3.0.0)"]
signals = ["blinker (>=1.4.0)"]
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "openai"
version = "0.27.8"
@ -2080,6 +2190,131 @@ files = [
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
]
[[package]]
name = "pandas"
version = "2.1.0"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
{file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"},
{file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"},
{file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"},
{file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"},
{file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"},
{file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"},
{file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"},
{file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"},
{file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"},
{file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"},
{file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"},
{file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"},
{file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"},
]
[package.dependencies]
numpy = {version = ">=1.23.2", markers = "python_version >= \"3.11\""}
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
tzdata = ">=2022.1"
[package.extras]
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
aws = ["s3fs (>=2022.05.0)"]
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
compression = ["zstandard (>=0.17.0)"]
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
feather = ["pyarrow (>=7.0.0)"]
fss = ["fsspec (>=2022.05.0)"]
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
hdf5 = ["tables (>=3.7.0)"]
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
parquet = ["pyarrow (>=7.0.0)"]
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
plot = ["matplotlib (>=3.6.1)"]
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
spss = ["pyreadstat (>=1.1.5)"]
sql-other = ["SQLAlchemy (>=1.4.36)"]
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
xml = ["lxml (>=4.8.0)"]
[[package]]
name = "pandas"
version = "2.1.1"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
]
[package.dependencies]
numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
tzdata = ">=2022.1"
[package.extras]
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
aws = ["s3fs (>=2022.05.0)"]
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
compression = ["zstandard (>=0.17.0)"]
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
feather = ["pyarrow (>=7.0.0)"]
fss = ["fsspec (>=2022.05.0)"]
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
hdf5 = ["tables (>=3.7.0)"]
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
parquet = ["pyarrow (>=7.0.0)"]
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
plot = ["matplotlib (>=3.6.1)"]
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
spss = ["pyreadstat (>=1.1.5)"]
sql-other = ["SQLAlchemy (>=1.4.36)"]
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
xml = ["lxml (>=4.8.0)"]
[[package]]
name = "pathvalidate"
version = "3.1.0"
@ -2377,6 +2612,32 @@ files = [
{file = "protobuf-3.20.3.tar.gz", hash = "sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2"},
]
[[package]]
name = "psutil"
version = "5.9.5"
description = "Cross-platform lib for process and system monitoring in Python."
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
{file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"},
{file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"},
{file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"},
{file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"},
{file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"},
{file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"},
{file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"},
{file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"},
{file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"},
{file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"},
{file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"},
{file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"},
{file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"},
{file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"},
]
[package.extras]
test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
[[package]]
name = "psycopg2"
version = "2.9.8"
@ -2953,6 +3214,24 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "requests-oauthlib"
version = "1.3.1"
description = "OAuthlib authentication support for Requests."
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
{file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"},
{file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"},
]
[package.dependencies]
oauthlib = ">=3.0.0"
requests = ">=2.0.0"
[package.extras]
rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
[[package]]
name = "requirements-parser"
version = "0.5.0"
@ -2967,6 +3246,20 @@ files = [
[package.dependencies]
types-setuptools = ">=57.0.0"
[[package]]
name = "retrying"
version = "1.3.4"
description = "Retrying"
optional = false
python-versions = "*"
files = [
{file = "retrying-1.3.4-py3-none-any.whl", hash = "sha256:8cc4d43cb8e1125e0ff3344e9de678fefd85db3b750b81b2240dc0183af37b35"},
{file = "retrying-1.3.4.tar.gz", hash = "sha256:345da8c5765bd982b1d1915deb9102fd3d1f7ad16bd84a9700b85f64d24e8f3e"},
]
[package.dependencies]
six = ">=1.7.0"
[[package]]
name = "rich"
version = "13.5.2"
@ -4538,4 +4831,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "37e192953f55c48139ec58d83cb0cc7c6724b56c0c4e191d4322c97aed6f079f"
content-hash = "90d5e6dc901e0ee6324452b378fd92b03872c4d63968379743247224c217a12c"

View file

@ -8,7 +8,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
langchain = "v0.0.271"
langchain = "v0.0.303"
nltk = "3.8.1"
openai = "0.27.8"
@ -40,9 +40,11 @@ weaviate-client = "^3.22.1"
python-multipart = "^0.0.6"
deep-translator = "^1.11.4"
humanize = "^4.8.0"
deepeval = "^0.10.12"
deepeval = "^0.20.0"
pymupdf = "^1.23.3"
psycopg2 = "^2.9.8"
llama-index = "^0.8.39.post2"
llama-hub = "^0.0.34"

106
level_3/rag_test_manager.py Normal file
View file

@ -0,0 +1,106 @@
from deepeval.metrics.overall_score import OverallScoreMetric
from deepeval.test_case import LLMTestCase
from deepeval.run_test import assert_test, run_test
import uuid
def retrieve_test_cases():
"""Retrieve test cases from a database or a file."""
pass
def check_params(chunk_size, chunk_overlap, chunk_strategy, loader_strategy, query, context, metadata):
"""Check parameters for test case runs and set defaults if necessary."""
pass
def run_load(test_id, document, **kwargs):
"""Run load for the given test_id and document with other parameters."""
pass
def compare_output(output, expected_output):
"""Compare the output against the expected output."""
pass
def generate_param_variants(base_params):
"""Generate parameter variants for testing."""
params_variants = [
{'chunk_size': base_params['chunk_size'] + i} for i in range(1, 4)
] + [
{'chunk_overlap': base_params['chunk_overlap'] + i} for i in range(1, 4)
]
# Add more parameter variations here as needed
return params_variants
def run_tests_with_variants(document, base_params, param_variants, expected_output):
"""Run tests with various parameter variants and validate the output."""
for variant in param_variants:
test_id = str(uuid.uuid4()) # Set new test id
updated_params = {**base_params, **variant} # Update parameters
output = run_load(test_id, document, **updated_params) # Run load with varied parameters
compare_output(output, expected_output) # Validate output
def run_rag_tests(document, chunk_size, chunk_overlap, chunk_strategy, loader_strategy, query, output, expected_output,
context, metadata):
"""Run RAG tests with various scenarios and parameter variants."""
test_cases = retrieve_test_cases() # Retrieve test cases
# Check and set parameters
base_params = check_params(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
chunk_strategy=chunk_strategy,
loader_strategy=loader_strategy,
query=query,
context=context,
metadata=metadata
)
# Set test id and run initial load test
test_id = str(uuid.uuid4())
output = run_load(test_id, document, **base_params)
compare_output(output, expected_output)
# Generate parameter variants for further tests
param_variants = generate_param_variants(base_params)
# Run tests with varied parameters for the single document
run_tests_with_variants(document, base_params, param_variants, expected_output)
# Assuming two documents are concatenated and treated as one
combined_document = document + document
# Run initial load test for combined document
output = run_load(test_id, combined_document, **base_params)
compare_output(output, expected_output)
# Run tests with varied parameters for the combined document
run_tests_with_variants(combined_document, base_params, param_variants, expected_output)
def test_0():
query = "How does photosynthesis work?"
output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll pigment."
expected_output = "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize food with the help of chlorophyll pigment."
context = "Biology"
test_case = LLMTestCase(
query=query,
output=output,
expected_output=expected_output,
context=context,
)
metric = OverallScoreMetric()
# if you want to make sure that the test returns an error
assert_test(test_case, metrics=[metric])
# If you want to run the test
test_result = run_test(test_case, metrics=[metric])
# You can also inspect the test result class
print(test_result)
print(test_result)

View file

@ -116,46 +116,8 @@ class BaseMemory:
# Create a Schema instance with the dynamic fields
dynamic_schema_instance = Schema.from_dict(dynamic_fields)()
return dynamic_schema_instance
async def convert_database_schema_to_marshmallow(self, memory_id, user_id):
Session = sessionmaker(bind=engine)
session = Session()
# Fetch schema version and fields from PostgreSQL
schema_metadata = session.query(MetaDatas.contract_metadata).where(MetaDatas.memory_id == memory_id).where(MetaDatas.user_id == user_id).first()
if not schema_metadata:
raise ValueError("Schema not found in database")
schema_metadata = schema_metadata[0].replace("'", '"')
print("schema_metadata: ", schema_metadata)
schema_fields = json.loads(schema_metadata)
print("schema_FIELDS: ", schema_fields)
# Dynamically create and return marshmallow schema
# if isinstance(field_props, dict) and 'type' in field_props:
# field_type = field_props['type']
# required = field_props.get('required', False)
# default = field_props.get('default', None)
# else:
# # Default to string type if field_props is not a dict or doesn't contain type
# field_type = "Str"
# required = False
# default = None
#
# setattr(DynamicSchema, field_name,
# self.create_field(
# field_type,
# required=required,
# default=default
# )
# )
return DynamicSchema
async def get_version_from_db(self, user_id, memory_id):
# Logic to retrieve the version from the database.

View file

@ -102,54 +102,9 @@ class WeaviateVectorDB(VectorDB):
)
return client
# def _document_loader(self, observation: str, loader_settings: dict):
# # Check the format of the document
# document_format = loader_settings.get("format", "text")
#
# if document_format == "PDF":
# if loader_settings.get("source") == "url":
# pdf_response = requests.get(loader_settings["path"])
# pdf_stream = BytesIO(pdf_response.content)
# contents = pdf_stream.read()
# tmp_location = os.path.join("/tmp", "tmp.pdf")
# with open(tmp_location, "wb") as tmp_file:
# tmp_file.write(contents)
#
# # Process the PDF using PyPDFLoader
# loader = PyPDFLoader(tmp_location)
# # adapt this for different chunking strategies
# pages = loader.load_and_split()
# return pages
# elif loader_settings.get("source") == "file":
# # Process the PDF using PyPDFLoader
# # might need adapting for different loaders + OCR
# # need to test the path
# loader = PyPDFLoader(loader_settings["path"])
# pages = loader.load_and_split()
# return pages
#
# elif document_format == "text":
# # Process the text directly
# return observation
#
# else:
# raise ValueError(f"Unsupported document format: {document_format}")
def _stuct(self, observation, params, metadata_schema_class =None):
"""Utility function to create the document structure with optional custom fields."""
# Dynamically construct metadata
# metadata = {
# key: str(getattr(self, key, params.get(key, "")))
# for key in [
# "user_id", "memory_id", "ltm_memory_id",
# "st_memory_id", "buffer_id", "version",
# "agreement_id", "privacy_policy", "terms_of_service",
# "format", "schema_version", "checksum",
# "owner", "license", "validity_start", "validity_end"
# ]
# }
# # Merge with custom fields if provided
# if custom_fields:
# metadata.update(custom_fields)
# Construct document data
document_data = {