diff --git a/level_3/.dlt/secrets.toml.example b/level_3/.dlt/secrets.toml.example new file mode 100644 index 000000000..5ef194241 --- /dev/null +++ b/level_3/.dlt/secrets.toml.example @@ -0,0 +1,6 @@ +[destination.weaviate.credentials] +url = "https://your-weaviate-url" +api_key = "your-weaviate-api-key" + +[destination.weaviate.credentials.additional_headers] +X-OpenAI-Api-Key = "your-openai-api-key" \ No newline at end of file diff --git a/level_3/database/database_crud.py b/level_3/database/database_crud.py new file mode 100644 index 000000000..74eee3a34 --- /dev/null +++ b/level_3/database/database_crud.py @@ -0,0 +1,15 @@ + + + +@contextmanager +def session_scope(session): + """Provide a transactional scope around a series of operations.""" + try: + yield session + session.commit() + except Exception as e: + session.rollback() + logger.error(f"Session rollback due to: {str(e)}") + raise + finally: + session.close() \ No newline at end of file diff --git a/level_3/models/user.py b/level_3/models/user.py index 58a548010..d9514c3cf 100644 --- a/level_3/models/user.py +++ b/level_3/models/user.py @@ -12,8 +12,7 @@ from database.database import Base class User(Base): __tablename__ = 'users' - id = Column(String, primary_key=True) - name = Column(String, nullable=False, unique=True, index=True) + id = Column(String, primary_key=True, index=True) session_id = Column(String, nullable=True, unique=True) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, onupdate=datetime.utcnow) @@ -27,4 +26,4 @@ class User(Base): metadatas = relationship("MetaDatas", back_populates="user") def __repr__(self): - return f"" + return f"" diff --git a/level_3/poetry.lock b/level_3/poetry.lock index 389c1d38b..63783ea81 100644 --- a/level_3/poetry.lock +++ b/level_3/poetry.lock @@ -685,13 +685,13 @@ pdf = ["pypdf (>=3.3.0,<4.0.0)"] [[package]] name = "deepeval" -version = "0.20.0" +version = "0.20.1" description = "DeepEval provides evaluation and unit testing to accelerate development of LLMs and Agents." optional = false python-versions = "*" files = [ - {file = "deepeval-0.20.0-py3-none-any.whl", hash = "sha256:81b73d0742974b6ee516c26d8235f3aa62dca765893d41f0eddd870507f70373"}, - {file = "deepeval-0.20.0.tar.gz", hash = "sha256:0e7ec2bbe69b03f9b5f21e5b285559363c2a84a9df25b1f7a278091f31fe7049"}, + {file = "deepeval-0.20.1-py3-none-any.whl", hash = "sha256:f9880a1246a2a8ba77d88b1d2f977759d34741df6d584bb3c55fadc95c52bc89"}, + {file = "deepeval-0.20.1.tar.gz", hash = "sha256:e3e36745f5e77bc6055def0b98e7a3274c87564f498f50337b670a291fde32a5"}, ] [package.dependencies] @@ -705,7 +705,7 @@ sentence-transformers = "*" tabulate = "*" tqdm = "*" transformers = "*" -typer = "*" +typer = "0.9.0" [package.extras] bias = ["Dbias", "tensorflow"] @@ -731,13 +731,13 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "dlt" -version = "0.3.14" +version = "0.3.18" description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "dlt-0.3.14-py3-none-any.whl", hash = "sha256:b7672e153065796d0e7b0bc7eacfc48feff32a28e091eeca30f5a7180e42da2c"}, - {file = "dlt-0.3.14.tar.gz", hash = "sha256:b398ee07a1b87a6ac93130fc8e143d77e99a30d1bf957468d0252f23f563c01e"}, + {file = "dlt-0.3.18-py3-none-any.whl", hash = "sha256:4ce792fc36382ae5d8bbcbd9510c2cfd4a312149da6ad94e099dcd2c22eb052a"}, + {file = "dlt-0.3.18.tar.gz", hash = "sha256:dd7f64c918aebd02f68c4a7a642566821c239829ac35ba7c2a3d19729e4402d3"}, ] [package.dependencies] @@ -750,7 +750,6 @@ gitpython = ">=3.1.29" giturlparse = ">=0.10.0" hexbytes = ">=0.2.2" humanize = ">=4.4.0" -json-logging = "1.4.1rc0" jsonpath-ng = ">=1.5.3" makefun = ">=1.15.0" orjson = {version = ">=3.6.7", markers = "platform_python_implementation != \"PyPy\""} @@ -771,20 +770,22 @@ typing-extensions = ">=4.0.0" tzdata = ">=2022.1" [package.extras] -athena = ["boto3 (>=1.25)", "pyarrow (>=8.0.0)", "pyathena (>=2.9.6)", "s3fs (>=2022.4.0)"] +athena = ["botocore (>=1.28)", "pyarrow (>=8.0.0)", "pyathena (>=2.9.6)", "s3fs (>=2022.4.0)"] +az = ["adlfs (>=2022.4.0)"] bigquery = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)", "pyarrow (>=8.0.0)"] cli = ["cron-descriptor (>=1.2.32)", "pipdeptree (>=2.9.0,<2.10)"] dbt = ["dbt-athena-community (>=1.2.0)", "dbt-bigquery (>=1.2.0)", "dbt-core (>=1.2.0)", "dbt-duckdb (>=1.2.0)", "dbt-redshift (>=1.2.0)", "dbt-snowflake (>=1.2.0)"] duckdb = ["duckdb (>=0.6.1,<0.9.0)"] -filesystem = ["boto3 (>=1.25)", "s3fs (>=2022.4.0)"] +filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] gcp = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"] gs = ["gcsfs (>=2022.4.0)"] motherduck = ["duckdb (>=0.6.1,<0.9.0)", "pyarrow (>=8.0.0)"] +mssql = ["pyodbc (>=4.0.39,<5.0.0)"] parquet = ["pyarrow (>=8.0.0)"] postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] pydantic = ["pydantic (>=1.10,<2.0)"] redshift = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] -s3 = ["boto3 (>=1.25)", "s3fs (>=2022.4.0)"] +s3 = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] snowflake = ["snowflake-connector-python[pandas] (>=2.9.0)"] weaviate = ["weaviate-client (>=3.22)"] @@ -1419,17 +1420,6 @@ files = [ {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, ] -[[package]] -name = "json-logging" -version = "1.4.1rc0" -description = "JSON Python Logging" -optional = false -python-versions = "*" -files = [ - {file = "json-logging-1.4.1rc0.tar.gz", hash = "sha256:381e00495bbd619d09c8c3d1fdd72c843f7045797ab63b42cfec5f7961e5b3f6"}, - {file = "json_logging-1.4.1rc0-py2.py3-none-any.whl", hash = "sha256:2b787c28f31fb4d8aabac16ac3816326031d92dd054bdabc9bbe68eb10864f77"}, -] - [[package]] name = "jsonpatch" version = "1.33" @@ -1474,13 +1464,13 @@ files = [ [[package]] name = "langchain" -version = "0.0.303" +version = "0.0.308" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain-0.0.303-py3-none-any.whl", hash = "sha256:1745961f66b60bc3b513820a34c560dd37c4ba4b7499ba82545dc4816d0133bd"}, - {file = "langchain-0.0.303.tar.gz", hash = "sha256:84d2727eb8b3b27a9d0aa0da9f05408c2564a4a923c7d5b154a16e488430e725"}, + {file = "langchain-0.0.308-py3-none-any.whl", hash = "sha256:807de0a8f4177e42e435682cfd33e600518d04e1688149afda8542b9d31a407f"}, + {file = "langchain-0.0.308.tar.gz", hash = "sha256:496ddef6c0aa8e73b3c28bad8c4cb02cdb7330e8ba80b238f1b3e0d663756b1b"}, ] [package.dependencies] @@ -1489,8 +1479,7 @@ anyio = "<4.0" async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">=0.5.7,<0.7" jsonpatch = ">=1.33,<2.0" -langsmith = ">=0.0.38,<0.1.0" -numexpr = ">=2.8.4,<3.0.0" +langsmith = ">=0.0.40,<0.1.0" numpy = ">=1,<2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -1499,16 +1488,16 @@ SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<9.0.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.6.8,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "libdeeplake (>=0.0.60,<0.0.61)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"] clarifai = ["clarifai (>=9.1.0)"] cohere = ["cohere (>=4,<5)"] docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["amazon-textract-caller (<2)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +extended-testing = ["amazon-textract-caller (<2)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (>=0,<1)", "openapi-schema-pydantic (>=1.2,<2.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.6.0)"] qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] text-helpers = ["chardet (>=5.1.0,<6.0.0)"] @@ -1982,48 +1971,6 @@ plot = ["matplotlib"] tgrep = ["pyparsing"] twitter = ["twython"] -[[package]] -name = "numexpr" -version = "2.8.5" -description = "Fast numerical expression evaluator for NumPy" -optional = false -python-versions = ">=3.7" -files = [ - {file = "numexpr-2.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51f3ab160c3847ebcca93cd88f935a7802b54a01ab63fe93152994a64d7a6cf2"}, - {file = "numexpr-2.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:de29c77f674e4eb8f0846525a475cab64008c227c8bc4ba5153ab3f72441cc63"}, - {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf85ba1327eb87ec82ae7936f13c8850fb969a0ca34f3ba9fa3897c09d5c80d7"}, - {file = "numexpr-2.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c00be69f747f44a631830215cab482f0f77f75af2925695adff57c1cc0f9a68"}, - {file = "numexpr-2.8.5-cp310-cp310-win32.whl", hash = "sha256:c46350dcdb93e32f033eea5a21269514ffcaf501d9abd6036992d37e48a308b0"}, - {file = "numexpr-2.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:894b027438b8ec88dea32a19193716c79f4ff8ddb92302dcc9731b51ba3565a8"}, - {file = "numexpr-2.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6df184d40d4cf9f21c71f429962f39332f7398147762588c9f3a5c77065d0c06"}, - {file = "numexpr-2.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:178b85ad373c6903e55d75787d61b92380439b70d94b001cb055a501b0821335"}, - {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:578fe4008e4d5d6ff01bbeb2d7b7ba1ec658a5cda9c720cd26a9a8325f8ef438"}, - {file = "numexpr-2.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef621b4ee366a5c6a484f6678c9259f5b826569f8bfa0b89ba2306d5055468bb"}, - {file = "numexpr-2.8.5-cp311-cp311-win32.whl", hash = "sha256:dd57ab1a3d3aaa9274aff1cefbf93b8ddacc7973afef5b125905f6bf18fabab0"}, - {file = "numexpr-2.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:783324ba40eb804ecfc9ebae86120a1e339ab112d0ab8a1f0d48a26354d5bf9b"}, - {file = "numexpr-2.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:183d5430db76826e54465c69db93a3c6ecbf03cda5aa1bb96eaad0147e9b68dc"}, - {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39ce106f92ccea5b07b1d6f2f3c4370f05edf27691dc720a63903484a2137e48"}, - {file = "numexpr-2.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b594dc9e2d6291a0bc5c065e6d9caf3eee743b5663897832e9b17753c002947a"}, - {file = "numexpr-2.8.5-cp37-cp37m-win32.whl", hash = "sha256:62b4faf8e0627673b0210a837792bddd23050ecebc98069ab23eb0633ff1ef5f"}, - {file = "numexpr-2.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:db5c65417d69414f1ab31302ea01d3548303ef31209c38b4849d145be4e1d1ba"}, - {file = "numexpr-2.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eb36ffcfa1606e41aa08d559b4277bcad0e16b83941d1a4fee8d2bd5a34f8e0e"}, - {file = "numexpr-2.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:34af2a0e857d02a4bc5758bc037a777d50dacb13bcd57c7905268a3e44994ed6"}, - {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a8dad2bfaad5a5c34a2e8bbf62b9df1dfab266d345fda1feb20ff4e264b347a"}, - {file = "numexpr-2.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93f5a866cd13a808bc3d3a9c487d94cd02eec408b275ff0aa150f2e8e5191f8"}, - {file = "numexpr-2.8.5-cp38-cp38-win32.whl", hash = "sha256:558390fea6370003ac749ed9d0f38d708aa096f5dcb707ddb6e0ca5a0dd37da1"}, - {file = "numexpr-2.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:55983806815035eb63c5039520688c49536bb7f3cc3fc1d7d64c6a00cf3f353e"}, - {file = "numexpr-2.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1510da20e6f5f45333610b1ded44c566e2690c6c437c84f2a212ca09627c7e01"}, - {file = "numexpr-2.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e8b5bf7bcb4e8dcd66522d8fc96e1db7278f901cb4fd2e155efbe62a41dde08"}, - {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ed0e1c1ef5f34381448539f1fe9015906d21c9cfa2797c06194d4207dadb465"}, - {file = "numexpr-2.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aea6ab45c87c0a7041183c08a798f0ad4d7c5eccbce20cfe79ce6f1a45ef3702"}, - {file = "numexpr-2.8.5-cp39-cp39-win32.whl", hash = "sha256:cbfd833ee5fdb0efb862e152aee7e6ccea9c596d5c11d22604c2e6307bff7cad"}, - {file = "numexpr-2.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:283ce8609a7ccbadf91a68f3484558b3e36d27c93c98a41ec205efb0ab43c872"}, - {file = "numexpr-2.8.5.tar.gz", hash = "sha256:45ed41e55a0abcecf3d711481e12a5fb7a904fe99d42bc282a17cc5f8ea510be"}, -] - -[package.dependencies] -numpy = ">=1.13.3" - [[package]] name = "numpy" version = "1.25.2" @@ -4831,4 +4778,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "90d5e6dc901e0ee6324452b378fd92b03872c4d63968379743247224c217a12c" +content-hash = "cf14b96576c57633fea1b0cc1f7266a5fb265d4ff4ce2d3479a036483082b3b4" diff --git a/level_3/pyproject.toml b/level_3/pyproject.toml index 6e849f244..fc7d694ea 100644 --- a/level_3/pyproject.toml +++ b/level_3/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" #langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"} -langchain = "v0.0.303" +langchain = "v0.0.308" nltk = "3.8.1" openai = "0.27.8" diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py index fb04b6141..f30dffb11 100644 --- a/level_3/rag_test_manager.py +++ b/level_3/rag_test_manager.py @@ -145,7 +145,8 @@ def generate_param_variants(base_params=None, increments=None, ranges=None, incl 'chunk_size': 500, 'chunk_overlap': 20, 'similarity_score': 0.5, - 'metadata_variation': 0 + 'metadata_variation': 0, + 'search_type': 'hybrid' } # Update defaults with provided base parameters @@ -178,8 +179,10 @@ def generate_param_variants(base_params=None, increments=None, ranges=None, incl for key in ['chunk_size', 'chunk_overlap', 'similarity_score', 'metadata_variation'] } + param_ranges['cognitive_architecture'] = ["simple_index", "cognitive_architecture"] - param_ranges['search_strategy'] = ["similarity_score", "fusion_score"] + # Add search_type with possible values + param_ranges['search_type'] = ['text', 'hybrid', 'bm25', 'generate', 'generate_grouped'] # Filter param_ranges based on included_params if included_params is not None: @@ -279,8 +282,8 @@ def fetch_test_set_id(session, user_id, id): return ( session.query(TestSet.id) .filter_by(user_id=user_id, id=id) - .order_by(TestSet.created_at.desc()) - .first() + .order_by(TestSet.created_at) + .desc().first() ) except Exception as e: logger.error(f"An error occurred while retrieving the job: {str(e)}") @@ -292,6 +295,9 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None Session = sessionmaker(bind=engine) session = Session() + + memory = Memory.create_memory(user_id, session, namespace="SEMANTICMEMORY") + job_id = fetch_job_id(session, user_id = user_id,job_id =job_id) test_set_id = fetch_test_set_id(session, user_id=user_id, id=job_id) if job_id is None: @@ -321,13 +327,6 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None for test in test_params: test_id = str(generate_letter_uuid()) + "_" + "SEMANTICEMEMORY" - - #handle test data here - - Session = sessionmaker(bind=engine) - session = Session() - memory = Memory.create_memory(user_id, session, namespace=test_id) - # Adding a memory instance memory.add_memory_instance("ExampleMemory") @@ -379,10 +378,8 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None memory.add_method_to_class(dynamic_memory_class, 'delete_memories') else: print(f"No attribute named {test_class.lower()} in memory.") - load_action = await memory.dynamic_method_call(dynamic_memory_class, 'delete_memories', - namespace ='some_observation', params=metadata, - loader_settings=loader_settings) - memory.delete_memories(namespace=test_id) + delete_mems = await memory.dynamic_method_call(dynamic_memory_class, 'delete_memories', + namespace =test_id) print(test_result_collection) diff --git a/level_3/vectordb/basevectordb.py b/level_3/vectordb/basevectordb.py index a764c280b..d7c6332e3 100644 --- a/level_3/vectordb/basevectordb.py +++ b/level_3/vectordb/basevectordb.py @@ -56,6 +56,7 @@ class VectorDBFactory: buffer_id: str = BUFFER_ID_DEFAULT, db_type: str = "pinecone", namespace: str = None, + embeddings = None, ): db_map = {"pinecone": PineconeVectorDB, "weaviate": WeaviateVectorDB} @@ -68,6 +69,7 @@ class VectorDBFactory: st_memory_id, buffer_id, namespace, + embeddings ) raise ValueError(f"Unsupported database type: {db_type}") @@ -80,11 +82,13 @@ class BaseMemory: index_name: Optional[str], db_type: str, namespace: str, + embeddings: Optional[None], ): self.user_id = user_id self.memory_id = memory_id self.index_name = index_name self.namespace = namespace + self.embeddings = embeddings self.db_type = db_type factory = VectorDBFactory() self.vector_db = factory.create_vector_db( @@ -93,11 +97,12 @@ class BaseMemory: self.memory_id, db_type=self.db_type, namespace=self.namespace, + embeddings=self.embeddings ) - def init_client(self, namespace: str): + def init_client(self, embeddings, namespace: str): - return self.vector_db.init_weaviate_client(namespace) + return self.vector_db.init_weaviate_client(embeddings, namespace) def create_field(self, field_type, **kwargs): field_mapping = { @@ -174,6 +179,7 @@ class BaseMemory: params: Optional[dict] = None, namespace: Optional[str] = None, custom_fields: Optional[str] = None, + embeddings: Optional[str] = None, ): from ast import literal_eval @@ -209,24 +215,25 @@ class BaseMemory: return await self.vector_db.add_memories( observation=observation, loader_settings=loader_settings, - params=loaded_params, namespace=namespace, metadata_schema_class = schema_instance + params=loaded_params, namespace=namespace, metadata_schema_class = schema_instance, embeddings=embeddings ) # Add other db_type conditions if necessary async def fetch_memories( self, observation: str, + search_type: Optional[str] = None, params: Optional[str] = None, namespace: Optional[str] = None, n_of_observations: Optional[int] = 2, ): return await self.vector_db.fetch_memories( - observation=observation, params=params, + observation=observation, search_type= search_type, params=params, namespace=namespace, n_of_observations=n_of_observations ) - async def delete_memories(self, params: Optional[str] = None): - return await self.vector_db.delete_memories(params) + async def delete_memories(self, namespace:str, params: Optional[str] = None): + return await self.vector_db.delete_memories(namespace,params) diff --git a/level_3/vectordb/vectordb.py b/level_3/vectordb/vectordb.py index 4bc0b32ee..493de1c5e 100644 --- a/level_3/vectordb/vectordb.py +++ b/level_3/vectordb/vectordb.py @@ -36,6 +36,7 @@ class VectorDB: st_memory_id: str = ST_MEMORY_ID_DEFAULT, buffer_id: str = BUFFER_ID_DEFAULT, namespace: str = None, + embeddings = None, ): self.user_id = user_id self.index_name = index_name @@ -44,6 +45,7 @@ class VectorDB: self.ltm_memory_id = ltm_memory_id self.st_memory_id = st_memory_id self.buffer_id = buffer_id + self.embeddings = embeddings class PineconeVectorDB(VectorDB): def __init__(self, *args, **kwargs): @@ -54,15 +56,15 @@ class PineconeVectorDB(VectorDB): # Pinecone initialization logic pass - +import langchain.embeddings class WeaviateVectorDB(VectorDB): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.init_weaviate(self.namespace) + self.init_weaviate(embeddings= self.embeddings, namespace = self.namespace) - def init_weaviate(self, namespace: str): + def init_weaviate(self, embeddings =OpenAIEmbeddings() , namespace: str=None): # Weaviate initialization logic - embeddings = OpenAIEmbeddings() + # embeddings = OpenAIEmbeddings() auth_config = weaviate.auth.AuthApiKey( api_key=os.environ.get("WEAVIATE_API_KEY") ) @@ -112,11 +114,11 @@ class WeaviateVectorDB(VectorDB): CurrentDocumentSchema = get_document_schema() loaded_document = CurrentDocumentSchema().load(document_data) return [loaded_document] - async def add_memories(self, observation, loader_settings=None, params=None, namespace=None, metadata_schema_class=None): + async def add_memories(self, observation, loader_settings=None, params=None, namespace=None, metadata_schema_class=None, embeddings = 'hybrid'): # Update Weaviate memories here if namespace is None: namespace = self.namespace - retriever = self.init_weaviate(namespace) + retriever = self.init_weaviate(embeddings=embeddings,namespace = namespace) if loader_settings: # Assuming _document_loader returns a list of documents documents = _document_loader(observation, loader_settings) @@ -133,25 +135,26 @@ class WeaviateVectorDB(VectorDB): retriever.add_documents([ Document(metadata=document_to_load[0]['metadata'], page_content=document_to_load[0]['page_content'])]) - async def fetch_memories( - self, observation: str, namespace: str, params: dict = None, n_of_observations: int = 2 - ): + async def fetch_memories(self, observation: str, namespace: str = None, search_type: str = 'hybrid', **kwargs): """ Fetch documents from weaviate. Parameters: - observation (str): User query. - - namespace (str): Type of memory accessed. - - params (dict, optional): Filtering parameters. - - n_of_observations (int, optional): For weaviate, equals to autocut. Defaults to 2. Ranges from 1 to 3. + - namespace (str, optional): Type of memory accessed. + - search_type (str, optional): Type of search ('text', 'hybrid', 'bm25', 'generate', 'generate_grouped'). Defaults to 'hybrid'. + - **kwargs: Additional parameters for flexibility. Returns: - List of documents matching the query. + List of documents matching the query or an empty list in case of error. Example: - fetch_memories(query="some query", path=['year'], operator='Equal', valueText='2017*') + fetch_memories(query="some query", search_type='text', additional_param='value') """ client = self.init_weaviate_client(self.namespace) + if search_type is None: + search_type = 'hybrid' + if not namespace: namespace = self.namespace @@ -176,23 +179,51 @@ class WeaviateVectorDB(VectorDB): ["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", 'distance'] ).with_where(params_user_id).with_limit(10) - if params: - query_output = ( - base_query - .with_where(params) - .with_near_text({"concepts": [observation]}) - .do() - ) - else: - query_output = ( - base_query - .with_hybrid( - query=observation, - fusion_type=HybridFusion.RELATIVE_SCORE + try: + if search_type == 'text': + query_output = ( + base_query + .with_near_text({"concepts": [observation]}) + .do() ) - .with_autocut(n_of_observations) - .do() - ) + elif search_type == 'hybrid': + n_of_observations = kwargs.get('n_of_observations', 2) + + + query_output = ( + base_query + .with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE) + .with_autocut(n_of_observations) + .do() + ) + elif search_type == 'bm25': + query_output = ( + base_query + .with_bm25(query=observation) + .do() + ) + elif search_type == 'generate': + generate_prompt = kwargs.get('generate_prompt', "") + query_output = ( + base_query + .with_generate(single_prompt=generate_prompt) + .with_near_text({"concepts": [observation]}) + .do() + ) + elif search_type == 'generate_grouped': + generate_prompt = kwargs.get('generate_prompt', "") + query_output = ( + base_query + .with_generate(grouped_task=generate_prompt) + .with_near_text({"concepts": [observation]}) + .do() + ) + else: + logging.error(f"Invalid search_type: {search_type}") + return [] + except Exception as e: + logging.error(f"Error executing query: {str(e)}") + return [] return query_output diff --git a/level_3/vectorstore_manager.py b/level_3/vectorstore_manager.py index 578bed883..9a7bb7c18 100644 --- a/level_3/vectorstore_manager.py +++ b/level_3/vectorstore_manager.py @@ -30,14 +30,24 @@ from vectordb.basevectordb import BaseMemory class DynamicBaseMemory(BaseMemory): - def __init__(self, name, user_id, memory_id, index_name, db_type, namespace): - super().__init__(user_id, memory_id, index_name, db_type, namespace) + def __init__( + self, + name: str, + user_id: str, + memory_id: str, + index_name: str, + db_type: str, + namespace: str, + embeddings=None + ): + super().__init__(user_id, memory_id, index_name, db_type, namespace, embeddings) self.name = name self.attributes = set() self.methods = set() self.inheritance = None self.associations = [] + def add_method(self, method_name): """ Add a method to the memory class. @@ -178,7 +188,7 @@ class Memory: def handle_new_user(user_id: str, session): """Handle new user creation in the DB and return the new memory ID.""" memory_id = str(uuid.uuid4()) - new_user = User(id=user_id, name="john doe") + new_user = User(id=user_id) session.add(new_user) session.commit() @@ -310,9 +320,9 @@ async def main(): sss = await memory.dynamic_method_call(memory.semanticmemory_class, 'add_memories', observation='some_observation', params=params) - # susu = await memory.dynamic_method_call(memory.semanticmemory_class, 'fetch_memories', - # observation='some_observation') - # print(susu) + susu = await memory.dynamic_method_call(memory.semanticmemory_class, 'fetch_memories', + observation='some_observation') + print(susu) # Adding a dynamic memory class # dynamic_memory = memory.add_dynamic_memory_class("DynamicMemory", "ExampleNamespace")