diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 72f15e0da..e47dee417 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -30,6 +30,10 @@ on: required: true GRAPHISTRY_PASSWORD: required: true + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true jobs: run-server-start-test: @@ -157,3 +161,30 @@ jobs: EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: poetry run python ./cognee/tests/test_deduplication.py + + run-s3-bucket-test: + name: S3 Bucket Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run S3 Bucket Test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: poetry run python ./cognee/tests/test_s3.py diff --git a/cognee/api/v1/add/config.py b/cognee/api/v1/add/config.py new file mode 100644 index 000000000..dc08e3333 --- /dev/null +++ b/cognee/api/v1/add/config.py @@ -0,0 +1,14 @@ +from typing import Optional +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class S3Config(BaseSettings): + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + model_config = SettingsConfigDict(env_file=".env", extra="allow") + + +@lru_cache +def get_s3_config(): + return S3Config() diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py index 072ef9ac4..26b98df08 100644 --- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py @@ -9,7 +9,7 @@ import aiohttp.http_exceptions from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer -from cognee.infrastructure.llm.rate_limiter import ( +from cognee.infrastructure.llm.embedding_rate_limiter import ( embedding_rate_limit_async, embedding_sleep_and_retry_async, ) diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py index e5d884c60..aea34c04c 100644 --- a/cognee/infrastructure/files/utils/get_file_metadata.py +++ b/cognee/infrastructure/files/utils/get_file_metadata.py @@ -20,7 +20,7 @@ def get_file_metadata(file: BinaryIO) -> FileMetadata: file_type = guess_file_type(file) - file_path = file.name + file_path = getattr(file, "name", None) or getattr(file, "full_name", None) file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None return FileMetadata( diff --git a/cognee/infrastructure/llm/openai/adapter.py b/cognee/infrastructure/llm/openai/adapter.py index 881e2db62..417af85df 100644 --- a/cognee/infrastructure/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/openai/adapter.py @@ -6,6 +6,8 @@ from typing import Type import litellm import instructor from pydantic import BaseModel + +from cognee.modules.data.processing.document_types.open_data_file import open_data_file from cognee.shared.data_models import MonitoringTool from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm.llm_interface import LLMInterface @@ -114,26 +116,24 @@ class OpenAIAdapter(LLMInterface): def create_transcript(self, input): """Generate a audio transcript from a user query.""" - if not os.path.isfile(input): + if not input.startswith("s3://") and not os.path.isfile(input): raise FileNotFoundError(f"The file {input} does not exist.") - # with open(input, 'rb') as audio_file: - # audio_data = audio_file.read() - - transcription = litellm.transcription( - model=self.transcription_model, - file=Path(input), - api_key=self.api_key, - api_base=self.endpoint, - api_version=self.api_version, - max_retries=self.MAX_RETRIES, - ) + with open_data_file(input, mode="rb") as audio_file: + transcription = litellm.transcription( + model=self.transcription_model, + file=audio_file, + api_key=self.api_key, + api_base=self.endpoint, + api_version=self.api_version, + max_retries=self.MAX_RETRIES, + ) return transcription @rate_limit_sync def transcribe_image(self, input) -> BaseModel: - with open(input, "rb") as image_file: + with open_data_file(input, mode="rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode("utf-8") return litellm.completion( diff --git a/cognee/modules/data/processing/document_types/PdfDocument.py b/cognee/modules/data/processing/document_types/PdfDocument.py index 12b919b4a..7ddbc0885 100644 --- a/cognee/modules/data/processing/document_types/PdfDocument.py +++ b/cognee/modules/data/processing/document_types/PdfDocument.py @@ -1,6 +1,6 @@ from pypdf import PdfReader from cognee.modules.chunking.Chunker import Chunker - +from .open_data_file import open_data_file from .Document import Document @@ -8,15 +8,14 @@ class PdfDocument(Document): type: str = "pdf" def read(self, chunker_cls: Chunker, max_chunk_size: int): - file = PdfReader(self.raw_data_location) + with open_data_file(self.raw_data_location, mode="rb") as stream: + file = PdfReader(stream) - def get_text(): - for page in file.pages: - page_text = page.extract_text() - yield page_text + def get_text(): + for page in file.pages: + page_text = page.extract_text() + yield page_text - chunker = chunker_cls(self, get_text=get_text, max_chunk_size=max_chunk_size) + chunker = chunker_cls(self, get_text=get_text, max_chunk_size=max_chunk_size) - yield from chunker.read() - - file.stream.close() + yield from chunker.read() diff --git a/cognee/modules/data/processing/document_types/TextDocument.py b/cognee/modules/data/processing/document_types/TextDocument.py index 8a65ec05b..db6b6a842 100644 --- a/cognee/modules/data/processing/document_types/TextDocument.py +++ b/cognee/modules/data/processing/document_types/TextDocument.py @@ -1,5 +1,6 @@ from .Document import Document from cognee.modules.chunking.Chunker import Chunker +from .open_data_file import open_data_file class TextDocument(Document): @@ -7,15 +8,12 @@ class TextDocument(Document): def read(self, chunker_cls: Chunker, max_chunk_size: int): def get_text(): - with open(self.raw_data_location, mode="r", encoding="utf-8") as file: + with open_data_file(self.raw_data_location, mode="r", encoding="utf-8") as file: while True: text = file.read(1000000) - - if len(text.strip()) == 0: + if not text.strip(): break - yield text chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text) - yield from chunker.read() diff --git a/cognee/modules/data/processing/document_types/UnstructuredDocument.py b/cognee/modules/data/processing/document_types/UnstructuredDocument.py index 731f558b4..b95b93fae 100644 --- a/cognee/modules/data/processing/document_types/UnstructuredDocument.py +++ b/cognee/modules/data/processing/document_types/UnstructuredDocument.py @@ -2,6 +2,7 @@ from io import StringIO from cognee.modules.chunking.Chunker import Chunker from cognee.modules.data.exceptions import UnstructuredLibraryImportError +from cognee.modules.data.processing.document_types.open_data_file import open_data_file from .Document import Document @@ -16,16 +17,19 @@ class UnstructuredDocument(Document): except ModuleNotFoundError: raise UnstructuredLibraryImportError - elements = partition(self.raw_data_location, content_type=self.mime_type) + if self.raw_data_location.startswith("s3://"): + with open_data_file(self.raw_data_location, mode="rb") as f: + elements = partition(file=f, content_type=self.mime_type) + else: + elements = partition(self.raw_data_location, content_type=self.mime_type) + in_memory_file = StringIO("\n\n".join([str(el) for el in elements])) in_memory_file.seek(0) while True: text = in_memory_file.read(1024) - - if len(text.strip()) == 0: + if not text.strip(): break - yield text chunker = chunker_cls(self, get_text=get_text, max_chunk_size=max_chunk_size) diff --git a/cognee/modules/data/processing/document_types/open_data_file.py b/cognee/modules/data/processing/document_types/open_data_file.py new file mode 100644 index 000000000..207b67fdb --- /dev/null +++ b/cognee/modules/data/processing/document_types/open_data_file.py @@ -0,0 +1,26 @@ +import s3fs +from typing import IO, Optional +from cognee.api.v1.add.config import get_s3_config + + +def open_data_file( + file_path: str, mode: str = "rb", encoding: Optional[str] = None, **kwargs +) -> IO: + if file_path.startswith("s3://"): + s3_config = get_s3_config() + if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + fs = s3fs.S3FileSystem( + key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False + ) + else: + raise ValueError("S3 credentials are not set in the configuration.") + + if "b" in mode: + f = fs.open(file_path, mode=mode, **kwargs) + if not hasattr(f, "name") or not f.name: + f.name = file_path.split("/")[-1] + return f + else: + return fs.open(file_path, mode=mode, encoding=encoding, **kwargs) + else: + return open(file_path, mode=mode, encoding=encoding, **kwargs) diff --git a/cognee/modules/ingestion/classify.py b/cognee/modules/ingestion/classify.py index dd52df86a..5f3bbbd38 100644 --- a/cognee/modules/ingestion/classify.py +++ b/cognee/modules/ingestion/classify.py @@ -1,18 +1,22 @@ from io import BufferedReader -from typing import Union, BinaryIO -from .data_types import TextData, BinaryData +from typing import Union, BinaryIO, Optional +from .data_types import TextData, BinaryData, S3BinaryData from tempfile import SpooledTemporaryFile - +from s3fs.core import S3File, S3FileSystem from cognee.modules.ingestion.exceptions import IngestionError -def classify(data: Union[str, BinaryIO], filename: str = None): +def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[S3FileSystem] = None): if isinstance(data, str): return TextData(data) if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): return BinaryData(data, str(data.name).split("/")[-1] if data.name else filename) + if isinstance(data, S3File): + derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename + return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs) + raise IngestionError( message=f"Type of data sent to classify(data: Union[str, BinaryIO) not supported: {type(data)}" ) diff --git a/cognee/modules/ingestion/data_types/S3BinaryData.py b/cognee/modules/ingestion/data_types/S3BinaryData.py new file mode 100644 index 000000000..0536665c8 --- /dev/null +++ b/cognee/modules/ingestion/data_types/S3BinaryData.py @@ -0,0 +1,42 @@ +from typing import Optional +import s3fs +from cognee.infrastructure.files import get_file_metadata, FileMetadata +from .IngestionData import IngestionData + + +def create_s3_binary_data( + s3_path: str, name: Optional[str] = None, s3: Optional[s3fs.S3FileSystem] = None +) -> "S3BinaryData": + return S3BinaryData(s3_path, name=name, s3=s3) + + +class S3BinaryData(IngestionData): + name: Optional[str] = None + s3_path: str = None + fs: s3fs.S3FileSystem = None + metadata: Optional[FileMetadata] = None + + def __init__( + self, s3_path: str, name: Optional[str] = None, s3: Optional[s3fs.S3FileSystem] = None + ): + self.s3_path = s3_path + self.name = name + self.fs = s3 if s3 is not None else s3fs.S3FileSystem() + + def get_identifier(self): + metadata = self.get_metadata() + return metadata["content_hash"] + + def get_metadata(self): + self.ensure_metadata() + return self.metadata + + def ensure_metadata(self): + if self.metadata is None: + with self.fs.open(self.s3_path, "rb") as f: + self.metadata = get_file_metadata(f) + if self.metadata.get("name") is None: + self.metadata["name"] = self.name or self.s3_path.split("/")[-1] + + def get_data(self): + return self.fs.open(self.s3_path, "rb") diff --git a/cognee/modules/ingestion/data_types/__init__.py b/cognee/modules/ingestion/data_types/__init__.py index 2cc5796cd..f68306f80 100644 --- a/cognee/modules/ingestion/data_types/__init__.py +++ b/cognee/modules/ingestion/data_types/__init__.py @@ -1,3 +1,4 @@ from .TextData import TextData, create_text_data from .BinaryData import BinaryData, create_binary_data +from .S3BinaryData import S3BinaryData, create_s3_binary_data from .IngestionData import IngestionData diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 78475d106..fe8a81be0 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -1,6 +1,7 @@ from typing import Any, List import dlt +import s3fs import cognee.modules.ingestion as ingestion from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.data.methods import create_dataset, get_dataset_data, get_datasets_by_name @@ -13,6 +14,8 @@ from .save_data_item_to_storage import save_data_item_to_storage from typing import Union, BinaryIO import inspect +from cognee.api.v1.add.config import get_s3_config + async def ingest_data(data: Any, dataset_name: str, user: User): destination = get_dlt_destination() @@ -22,6 +25,21 @@ async def ingest_data(data: Any, dataset_name: str, user: User): destination=destination, ) + s3_config = get_s3_config() + + fs = None + if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + fs = s3fs.S3FileSystem( + key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False + ) + + def open_data_file(file_path: str): + if file_path.startswith("s3://"): + return fs.open(file_path, mode="rb") + else: + local_path = file_path.replace("file://", "") + return open(local_path, mode="rb") + def get_external_metadata_dict(data_item: Union[BinaryIO, str, Any]) -> dict[str, Any]: if hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")): return {"metadata": data_item.dict(), "origin": str(type(data_item))} @@ -31,8 +49,11 @@ async def ingest_data(data: Any, dataset_name: str, user: User): @dlt.resource(standalone=True, primary_key="id", merge_key="id") async def data_resources(file_paths: List[str], user: User): for file_path in file_paths: - with open(file_path.replace("file://", ""), mode="rb") as file: - classified_data = ingestion.classify(file) + with open_data_file(file_path) as file: + if file_path.startswith("s3://"): + classified_data = ingestion.classify(file, s3fs=fs) + else: + classified_data = ingestion.classify(file) data_id = ingestion.identify(classified_data, user) file_metadata = classified_data.get_metadata() yield { @@ -59,8 +80,9 @@ async def ingest_data(data: Any, dataset_name: str, user: User): file_paths.append(file_path) # Ingest data and add metadata - with open(file_path.replace("file://", ""), mode="rb") as file: - classified_data = ingestion.classify(file) + # with open(file_path.replace("file://", ""), mode="rb") as file: + with open_data_file(file_path) as file: + classified_data = ingestion.classify(file, s3fs=fs) # data_id is the hash of file contents + owner id to avoid duplicate data data_id = ingestion.identify(classified_data, user) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index e549688d8..0b3edc382 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -1,5 +1,8 @@ import os +import s3fs from typing import List, Union, BinaryIO +from urllib.parse import urlparse +from cognee.api.v1.add.config import get_s3_config async def resolve_data_directories( @@ -20,10 +23,35 @@ async def resolve_data_directories( data = [data] resolved_data = [] + s3_config = get_s3_config() + + fs = None + if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + fs = s3fs.S3FileSystem( + key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False + ) for item in data: if isinstance(item, str): # Check if the item is a path - if os.path.isdir(item): # If it's a directory + # S3 + if urlparse(item).scheme == "s3": + if fs is not None: + if include_subdirectories: + base_path = item if item.endswith("/") else item + "/" + s3_keys = fs.glob(base_path + "**") + else: + s3_keys = fs.ls(item) + # Filter out keys that represent directories using fs.isdir + s3_files = [] + for key in s3_keys: + if not fs.isdir(key): + if not key.startswith("s3://"): + s3_files.append("s3://" + key) + else: + s3_files.append(key) + resolved_data.extend(s3_files) + + elif os.path.isdir(item): # If it's a directory if include_subdirectories: # Recursively add all files in the directory and subdirectories for root, _, files in os.walk(item): diff --git a/cognee/tasks/ingestion/save_data_item_to_storage.py b/cognee/tasks/ingestion/save_data_item_to_storage.py index 3f9d572c9..976c1dd26 100644 --- a/cognee/tasks/ingestion/save_data_item_to_storage.py +++ b/cognee/tasks/ingestion/save_data_item_to_storage.py @@ -16,8 +16,10 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any], datase file_path = save_data_to_file(data_item.file, filename=data_item.filename) elif isinstance(data_item, str): + if data_item.startswith("s3://"): + file_path = data_item # data is a file path - if data_item.startswith("file://") or data_item.startswith("/"): + elif data_item.startswith("file://") or data_item.startswith("/"): file_path = data_item.replace("file://", "") # data is text else: diff --git a/cognee/tests/test_s3.py b/cognee/tests/test_s3.py new file mode 100644 index 000000000..888458d33 --- /dev/null +++ b/cognee/tests/test_s3.py @@ -0,0 +1,86 @@ +import logging + +import cognee +from cognee.shared.logging_utils import get_logger +from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine +from collections import Counter + + +logger = get_logger() + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + s3_input = "s3://samples3input" + await cognee.add(s3_input) + + await cognee.cognify() + + graph_engine = await get_graph_engine() + graph = await graph_engine.get_graph() + + type_counts = Counter( + node_data["type"] for _, node_data in graph.nodes(data=True) if "type" in node_data + ) + + edge_type_counts = Counter(edge_type for _, _, edge_type in graph.edges(keys=True)) + + logging.info(type_counts) + + # Assert there is exactly one PdfDocument. + assert type_counts.get("PdfDocument", 0) == 1, ( + f"Expected exactly one PdfDocument, but found {type_counts.get('PdfDocument', 0)}" + ) + + # Assert there is exactly one TextDocument. + assert type_counts.get("TextDocument", 0) == 1, ( + f"Expected exactly one TextDocument, but found {type_counts.get('TextDocument', 0)}" + ) + + # Assert there are at least two DocumentChunk nodes. + assert type_counts.get("DocumentChunk", 0) >= 2, ( + f"Expected at least two DocumentChunk nodes, but found {type_counts.get('DocumentChunk', 0)}" + ) + + # Assert there is at least two TextSummary. + assert type_counts.get("TextSummary", 0) >= 2, ( + f"Expected at least two TextSummary, but found {type_counts.get('TextSummary', 0)}" + ) + + # Assert there is at least one Entity. + assert type_counts.get("Entity", 0) > 0, ( + f"Expected more than zero Entity nodes, but found {type_counts.get('Entity', 0)}" + ) + + # Assert there is at least one EntityType. + assert type_counts.get("EntityType", 0) > 0, ( + f"Expected more than zero EntityType nodes, but found {type_counts.get('EntityType', 0)}" + ) + + # Assert that there are at least two 'is_part_of' edges. + assert edge_type_counts.get("is_part_of", 0) >= 2, ( + f"Expected at least two 'is_part_of' edges, but found {edge_type_counts.get('is_part_of', 0)}" + ) + + # Assert that there are at least two 'made_from' edges. + assert edge_type_counts.get("made_from", 0) >= 2, ( + f"Expected at least two 'made_from' edges, but found {edge_type_counts.get('made_from', 0)}" + ) + + # Assert that there is at least one 'is_a' edge. + assert edge_type_counts.get("is_a", 0) >= 1, ( + f"Expected at least one 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}" + ) + + # Assert that there is at least one 'contains' edge. + assert edge_type_counts.get("contains", 0) >= 1, ( + f"Expected at least one 'contains' edge, but found {edge_type_counts.get('contains', 0)}" + ) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/poetry.lock b/poetry.lock index 317054fe0..5879918aa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,30 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. + +[[package]] +name = "aiobotocore" +version = "2.21.1" +description = "Async client for aws services using botocore and aiohttp" +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"filesystem\"" +files = [ + {file = "aiobotocore-2.21.1-py3-none-any.whl", hash = "sha256:bd7c49a6d6f8a3d9444b0a94417c8da13813b5c7eec1c4f0ec2db7e8ce8f23e7"}, + {file = "aiobotocore-2.21.1.tar.gz", hash = "sha256:010357f43004413e92a9d066bb0db1f241aeb29ffed306e9197061ffc94e6577"}, +] + +[package.dependencies] +aiohttp = ">=3.9.2,<4.0.0" +aioitertools = ">=0.5.1,<1.0.0" +botocore = ">=1.37.0,<1.37.2" +jmespath = ">=0.7.1,<2.0.0" +multidict = ">=6.0.0,<7.0.0" +python-dateutil = ">=2.1,<3.0.0" +wrapt = ">=1.10.10,<2.0.0" + +[package.extras] +awscli = ["awscli (>=1.38.0,<1.38.2)"] +boto3 = ["boto3 (>=1.37.0,<1.37.2)"] [[package]] name = "aiofiles" @@ -128,6 +154,23 @@ yarl = ">=1.17.0,<2.0" [package.extras] speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +[[package]] +name = "aioitertools" +version = "0.12.0" +description = "itertools and builtins for AsyncIO and mixed iterables" +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "extra == \"filesystem\"" +files = [ + {file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"}, + {file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"}, +] + +[package.extras] +dev = ["attribution (==1.8.0)", "black (==24.8.0)", "build (>=1.2)", "coverage (==7.6.1)", "flake8 (==7.1.1)", "flit (==3.9.0)", "mypy (==1.11.2)", "ufmt (==2.7.1)", "usort (==1.0.8.post1)"] +docs = ["sphinx (==8.0.2)", "sphinx-mdinclude (==0.6.2)"] + [[package]] name = "aiosignal" version = "1.3.2" @@ -408,7 +451,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -565,7 +608,7 @@ description = "Backport of CPython tarfile module" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "python_version < \"3.12\" and extra == \"deepeval\"" +markers = "extra == \"deepeval\" and python_version <= \"3.11\"" files = [ {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"}, {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"}, @@ -774,18 +817,18 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "boto3" -version = "1.37.33" +version = "1.37.1" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.37.33-py3-none-any.whl", hash = "sha256:7b1b1bc69762975824e5a5d570880abebf634f7594f88b3dc175e8800f35be1a"}, - {file = "boto3-1.37.33.tar.gz", hash = "sha256:4390317a1578af73f1514651bd180ba25802dcbe0a23deafa13851d54d3c3203"}, + {file = "boto3-1.37.1-py3-none-any.whl", hash = "sha256:4320441f904435a1b85e6ecb81793192e522c737cc9ed6566014e29f0a11cb22"}, + {file = "boto3-1.37.1.tar.gz", hash = "sha256:96d18f7feb0c1fcb95f8837b74b6c8880e1b4e35ce5f8a8f8cb243a090c278ed"}, ] [package.dependencies] -botocore = ">=1.37.33,<1.38.0" +botocore = ">=1.37.1,<1.38.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -794,14 +837,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.37.33" +version = "1.37.1" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.37.33-py3-none-any.whl", hash = "sha256:4a167dfecae51e9140de24067de1c339acde5ade3dad524a4600ac2c72055e23"}, - {file = "botocore-1.37.33.tar.gz", hash = "sha256:09b213b0d0500040f85c7daee912ea767c724e43ed61909e624c803ff6925222"}, + {file = "botocore-1.37.1-py3-none-any.whl", hash = "sha256:c1db1bfc5d8c6b3b6d1ca6794f605294b4264e82a7e727b88e0fef9c2b9fbb9c"}, + {file = "botocore-1.37.1.tar.gz", hash = "sha256:b194db8fb2a0ffba53568c364ae26166e7eec0445496b2ac86a6e142f3dd982f"}, ] [package.dependencies] @@ -1183,7 +1226,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "(platform_system == \"Windows\" or sys_platform == \"win32\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"chromadb\" or extra == \"llama-index\" or extra == \"deepeval\")", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} +markers = {main = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"chromadb\" or extra == \"llama-index\" or extra == \"deepeval\") and (os_name == \"nt\" or platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\")", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -1192,7 +1235,7 @@ description = "Colored terminal output for Python's logging module" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, @@ -1806,7 +1849,7 @@ description = "Python datetimes made easy" optional = false python-versions = ">=3.9" groups = ["main"] -markers = "python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.13\"" files = [ {file = "dlt_pendulum-3.0.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9ae1222828474f9e4743f8929f8026abe2d0b3a99427a483da2868690b017332"}, {file = "dlt_pendulum-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:75e1b758f88f887706902438fa5b293f11cec5d656c6540c9957da8c9b953198"}, @@ -1989,7 +2032,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -2115,7 +2158,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener optional = true python-versions = ">=3.9.0" groups = ["main"] -markers = "extra == \"codegraph\" and python_version < \"3.13\"" +markers = "python_version < \"3.13\" and extra == \"codegraph\"" files = [ {file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"}, {file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"}, @@ -2187,7 +2230,7 @@ description = "The FlatBuffers serialization format for Python" optional = true python-versions = "*" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051"}, {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"}, @@ -2661,7 +2704,7 @@ description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\"" +markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or python_version < \"3.11\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"gemini\")" files = [ {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, @@ -2866,7 +2909,7 @@ description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\"" +markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or extra == \"gemini\")" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, @@ -3326,7 +3369,7 @@ description = "Human friendly output for text interfaces using Python" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, @@ -3527,7 +3570,7 @@ description = "IPython: Productive Interactive Computing" optional = true python-versions = ">=3.10" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"}, {file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"}, @@ -3732,7 +3775,7 @@ description = "Low-level, pure Python DBus protocol wrapper." optional = true python-versions = ">=3.7" groups = ["main"] -markers = "sys_platform == \"linux\" and extra == \"deepeval\"" +markers = "extra == \"deepeval\" and sys_platform == \"linux\"" files = [ {file = "jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683"}, {file = "jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732"}, @@ -4999,7 +5042,7 @@ description = "Python logging made (stupidly) simple" optional = true python-versions = "<4.0,>=3.5" groups = ["main"] -markers = "extra == \"codegraph\" and python_version < \"3.13\"" +markers = "python_version < \"3.13\" and extra == \"codegraph\"" files = [ {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, @@ -5450,7 +5493,7 @@ description = "A lightweight version of Milvus wrapped with Python." optional = true python-versions = ">=3.7" groups = ["main"] -markers = "sys_platform != \"win32\" and extra == \"milvus\"" +markers = "extra == \"milvus\" and sys_platform != \"win32\"" files = [ {file = "milvus_lite-2.4.12-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:e8d4f7cdd5f731efd6faeee3715d280fd91a5f9b4d89312664d56401f65b1473"}, {file = "milvus_lite-2.4.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:20087663e7b4385050b7ad08f1f03404426d4c87b1ff91d5a8723eee7fd49e88"}, @@ -5805,7 +5848,7 @@ description = "Python library for arbitrary-precision floating-point arithmetic" optional = true python-versions = "*" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -6380,7 +6423,7 @@ description = "ONNX Runtime is a runtime accelerator for Machine Learning models optional = true python-versions = ">=3.10" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "onnxruntime-1.21.0-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:95513c9302bc8dd013d84148dcf3168e782a80cdbf1654eddc948a23147ccd3d"}, {file = "onnxruntime-1.21.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:635d4ab13ae0f150dd4c6ff8206fd58f1c6600636ecc796f6f0c42e4c918585b"}, @@ -6853,8 +6896,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -7488,7 +7531,7 @@ description = "" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"gemini\" or extra == \"deepeval\" or extra == \"milvus\") and python_version < \"3.13\" or extra == \"gemini\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"milvus\"" +markers = "python_version == \"3.10\" and extra == \"codegraph\" or (extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"gemini\" or extra == \"milvus\") and python_version < \"3.11\" or (python_version == \"3.12\" or extra == \"gemini\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"milvus\") and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\") and python_version >= \"3.12\" or python_version == \"3.11\" and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\")" files = [ {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"}, {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"}, @@ -7602,7 +7645,7 @@ description = "Fast and parallel snowball stemmer" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"codegraph\" and python_version < \"3.13\"" +markers = "python_version < \"3.13\" and extra == \"codegraph\"" files = [ {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"}, {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"}, @@ -8004,8 +8047,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, + {version = ">=0.3.6", markers = "python_version == \"3.11\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -8146,7 +8189,7 @@ description = "A python implementation of GNU readline." optional = true python-versions = ">=3.8" groups = ["main"] -markers = "sys_platform == \"win32\" and (extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or sys_platform == \"win32\" and extra == \"chromadb\"" +markers = "sys_platform == \"win32\" and (python_version == \"3.10\" or extra == \"chromadb\" or extra == \"codegraph\") and (extra == \"chromadb\" or python_version == \"3.12\" or python_version == \"3.10\" or python_version == \"3.11\") and (extra == \"codegraph\" or extra == \"chromadb\")" files = [ {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, @@ -8503,7 +8546,7 @@ files = [ {file = "pywin32-310-cp39-cp39-win32.whl", hash = "sha256:851c8d927af0d879221e616ae1f66145253537bbdd321a77e8ef701b443a9a1a"}, {file = "pywin32-310-cp39-cp39-win_amd64.whl", hash = "sha256:96867217335559ac619f00ad70e513c0fcf84b8a3af9fc2bba3b59b97da70475"}, ] -markers = {main = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (extra == \"qdrant\" or extra == \"deepeval\" or sys_platform == \"win32\")", dev = "platform_python_implementation != \"PyPy\" and sys_platform == \"win32\""} +markers = {main = "(extra == \"qdrant\" or extra == \"deepeval\") and platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""} [[package]] name = "pywin32-ctypes" @@ -8512,7 +8555,7 @@ description = "A (partial) reimplementation of pywin32 using ctypes/cffi" optional = true python-versions = ">=3.6" groups = ["main"] -markers = "sys_platform == \"win32\" and extra == \"deepeval\"" +markers = "extra == \"deepeval\" and sys_platform == \"win32\"" files = [ {file = "pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755"}, {file = "pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8"}, @@ -8740,7 +8783,7 @@ description = "Client library for the Qdrant vector search engine" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "python_full_version == \"3.13.0\" and extra == \"qdrant\"" +markers = "python_version >= \"3.13\" and extra == \"qdrant\"" files = [ {file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"}, {file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"}, @@ -8766,7 +8809,7 @@ description = "Client library for the Qdrant vector search engine" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"qdrant\" and python_version < \"3.13\"" +markers = "python_version < \"3.13\" and extra == \"qdrant\"" files = [ {file = "qdrant_client-1.13.3-py3-none-any.whl", hash = "sha256:f52cacbb936e547d3fceb1aaed3e3c56be0ebfd48e8ea495ea3dbc89c671d1d2"}, {file = "qdrant_client-1.13.3.tar.gz", hash = "sha256:61ca09e07c6d7ac0dfbdeb13dca4fe5f3e08fa430cb0d74d66ef5d023a70adfc"}, @@ -9379,23 +9422,45 @@ files = [ {file = "ruff-0.11.5.tar.gz", hash = "sha256:cae2e2439cb88853e421901ec040a758960b576126dab520fa08e9de431d1bef"}, ] +[[package]] +name = "s3fs" +version = "2025.3.2" +description = "Convenient Filesystem interface over S3" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"filesystem\"" +files = [ + {file = "s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130"}, + {file = "s3fs-2025.3.2.tar.gz", hash = "sha256:6798f896ec76dd3bfd8beb89f0bb7c5263cb2760e038bae0978505cd172a307c"}, +] + +[package.dependencies] +aiobotocore = ">=2.5.4,<3.0.0" +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" +fsspec = "==2025.3.2.*" + +[package.extras] +awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] +boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] + [[package]] name = "s3transfer" -version = "0.11.4" +version = "0.11.3" description = "An Amazon S3 Transfer Manager" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "s3transfer-0.11.4-py3-none-any.whl", hash = "sha256:ac265fa68318763a03bf2dc4f39d5cbd6a9e178d81cc9483ad27da33637e320d"}, - {file = "s3transfer-0.11.4.tar.gz", hash = "sha256:559f161658e1cf0a911f45940552c696735f5c74e64362e515f333ebed87d679"}, + {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"}, + {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"}, ] [package.dependencies] -botocore = ">=1.37.4,<2.0a.0" +botocore = ">=1.36.0,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] +crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"] [[package]] name = "safetensors" @@ -9562,7 +9627,7 @@ description = "Python bindings to FreeDesktop.org Secret Service API" optional = true python-versions = ">=3.6" groups = ["main"] -markers = "sys_platform == \"linux\" and extra == \"deepeval\"" +markers = "extra == \"deepeval\" and sys_platform == \"linux\"" files = [ {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, @@ -10143,7 +10208,7 @@ description = "Computer algebra system (CAS) in Python" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" files = [ {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"}, {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"}, @@ -10328,7 +10393,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -10982,7 +11047,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = true python-versions = ">=3.8.0" groups = ["main"] -markers = "platform_python_implementation != \"PyPy\" and extra == \"chromadb\" and sys_platform != \"win32\" and sys_platform != \"cygwin\"" +markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"chromadb\"" files = [ {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, @@ -11396,7 +11461,7 @@ description = "A small Python utility to set file creation time on Windows" optional = true python-versions = ">=3.5" groups = ["main"] -markers = "sys_platform == \"win32\" and extra == \"codegraph\" and python_version < \"3.13\"" +markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\"" files = [ {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"}, {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"}, @@ -11656,7 +11721,7 @@ deepeval = ["deepeval"] docs = ["unstructured"] evals = ["gdown", "plotly"] falkordb = ["falkordb"] -filesystem = ["botocore"] +filesystem = ["botocore", "s3fs"] gemini = ["google-generativeai"] graphiti = ["graphiti-core"] groq = ["groq"] @@ -11678,4 +11743,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "c46737c9aa42018f91e663459a98c8c10a964dff47b91679bab5085e8b98faa6" +content-hash = "cef734016cd8fc4430277b30c639d0699361f0553c00fdd8fa0fbadbab0b7e7b" diff --git a/pyproject.toml b/pyproject.toml index 6ca55a4c6..99a081e46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ structlog = "^25.2.0" pyside6 = {version = "^6.8.3", optional = true} google-generativeai = {version = "^0.8.4", optional = true} notebook = {version = "^7.1.0", optional = true} +s3fs = "^2025.3.2" [tool.poetry.extras]