make advanced pdf loader optional

Signed-off-by: EricXiao <taoiaox@gmail.com>
This commit is contained in:
EricXiao 2025-09-22 15:07:58 +08:00
parent d12ec0bc4f
commit 7fae75d020
8 changed files with 178 additions and 140 deletions

View file

@ -87,7 +87,7 @@ class LoaderEngine:
if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime):
return loader
else:
raise ValueError(f"Loader does not exist: {loader_name}")
logger.info(f"Skipping {loader_name}: Preferred Loader not registered")
# Try default priority order
for loader_name in self.default_loader_priority:
@ -96,7 +96,9 @@ class LoaderEngine:
if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime):
return loader
else:
raise ValueError(f"Loader does not exist: {loader_name}")
logger.info(
f"Skipping {loader_name}: Loader not registered (in default priority list)."
)
return None

View file

@ -9,10 +9,9 @@ This module contains loaders that depend on external libraries:
These loaders are optional and only available if their dependencies are installed.
"""
from .advanced_pdf_loader import AdvancedPdfLoader
from .pypdf_loader import PyPdfLoader
__all__ = ["AdvancedPdfLoader", "PyPdfLoader"]
__all__ = ["PyPdfLoader"]
# Conditional imports based on dependency availability
try:
@ -21,3 +20,10 @@ try:
__all__.append("UnstructuredLoader")
except ImportError:
pass
try:
from .advanced_pdf_loader import AdvancedPdfLoader
__all__.append("AdvancedPdfLoader")
except ImportError:
pass

View file

@ -14,6 +14,14 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader
logger = get_logger(__name__)
try:
from unstructured.partition.pdf import partition_pdf
except ImportError as e:
logger.info(
"unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead."
)
raise ImportError from e
@dataclass
class _PageBuffer:
@ -61,16 +69,6 @@ class AdvancedPdfLoader(LoaderInterface):
LoaderResult with extracted text content and metadata
"""
try:
from unstructured.partition.pdf import partition_pdf
except ImportError:
logger.warning(
"unstructured[pdf] not installed, can't use AdvancedPdfLoader, using PyPDF fallback."
)
return await self._fallback(file_path, **kwargs)
try:
logger.info(f"Processing PDF: {file_path}")

View file

@ -1,9 +1,8 @@
from cognee.infrastructure.loaders.external import AdvancedPdfLoader, PyPdfLoader
from cognee.infrastructure.loaders.external import PyPdfLoader
from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader
# Registry for loader implementations
supported_loaders = {
AdvancedPdfLoader.loader_name: AdvancedPdfLoader,
PyPdfLoader.loader_name: PyPdfLoader,
TextLoader.loader_name: TextLoader,
ImageLoader.loader_name: ImageLoader,
@ -17,3 +16,10 @@ try:
supported_loaders[UnstructuredLoader.loader_name] = UnstructuredLoader
except ImportError:
pass
try:
from cognee.infrastructure.loaders.external import AdvancedPdfLoader
supported_loaders[AdvancedPdfLoader.loader_name] = AdvancedPdfLoader
except ImportError:
pass

View file

@ -42,29 +42,6 @@ def test_can_handle(loader, extension, mime_type, expected):
assert loader.can_handle(extension, mime_type) == expected
@pytest.mark.asyncio
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
@patch(
"unstructured.partition.pdf.partition_pdf",
side_effect=ImportError("unstructured not installed"),
)
async def test_load_fallback_on_import_error(mock_partition_pdf, mock_pypdf_loader, loader):
"""Test fallback to PyPdfLoader when unstructured is not installed"""
# Prepare Mock
mock_fallback_instance = MagicMock()
mock_fallback_instance.load = AsyncMock(return_value="/fake/path/fallback.txt")
mock_pypdf_loader.return_value = mock_fallback_instance
test_file_path = "/fake/path/to/document.pdf"
# Run
result_path = await loader.load(test_file_path)
# Assert
assert result_path == "/fake/path/fallback.txt"
mock_partition_pdf.assert_not_called() # partition_pdf should not be called
mock_fallback_instance.load.assert_awaited_once_with(test_file_path)
@pytest.mark.asyncio
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open)
@patch(
@ -74,7 +51,7 @@ async def test_load_fallback_on_import_error(mock_partition_pdf, mock_pypdf_load
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_storage_config")
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_storage")
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
@patch("unstructured.partition.pdf.partition_pdf")
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf")
async def test_load_success_with_unstructured(
mock_partition_pdf,
mock_pypdf_loader,
@ -141,7 +118,7 @@ async def test_load_success_with_unstructured(
)
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
@patch(
"unstructured.partition.pdf.partition_pdf",
"cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf",
side_effect=Exception("Unstructured failed!"),
)
async def test_load_fallback_on_unstructured_exception(

238
poetry.lock generated
View file

@ -4,9 +4,10 @@
name = "accelerate"
version = "1.10.1"
description = "Accelerate"
optional = false
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "accelerate-1.10.1-py3-none-any.whl", hash = "sha256:3621cff60b9a27ce798857ece05e2b9f56fcc71631cfb31ccf71f0359c311f11"},
{file = "accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8"},
@ -308,9 +309,10 @@ vertex = ["google-auth[requests] (>=2,<3)"]
name = "antlr4-python3-runtime"
version = "4.9.3"
description = "ANTLR 4.9.3 runtime for Python 3.7"
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"},
]
@ -758,9 +760,10 @@ typecheck = ["mypy"]
name = "beautifulsoup4"
version = "4.13.5"
description = "Screen-scraping library"
optional = false
optional = true
python-versions = ">=3.7.0"
groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\""
files = [
{file = "beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a"},
{file = "beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695"},
@ -865,9 +868,10 @@ virtualenv = ["virtualenv (>=20.11) ; python_version < \"3.10\"", "virtualenv (>
name = "cachetools"
version = "5.5.2"
description = "Extensible memoizing collections and decorators"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"},
{file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"},
@ -1582,9 +1586,10 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"]
name = "dataclasses-json"
version = "0.6.7"
description = "Easily serialize dataclasses to and from JSON."
optional = false
optional = true
python-versions = "<4.0,>=3.7"
groups = ["main"]
markers = "extra == \"llama-index\" or extra == \"docs\""
files = [
{file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"},
{file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"},
@ -1648,9 +1653,10 @@ files = [
name = "deepdiff"
version = "8.6.0"
description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other."
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "deepdiff-8.6.0-py3-none-any.whl", hash = "sha256:db80677a434ac1f84147fd1598e93f1beb06d467e107af45fcf77cf8a681169f"},
{file = "deepdiff-8.6.0.tar.gz", hash = "sha256:6197216c2d777c3106a9989055c230e25848e599b26dcbcdc66226bd8d7fe901"},
@ -1984,9 +1990,10 @@ files = [
name = "effdet"
version = "0.4.1"
description = "EfficientDet for PyTorch"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "effdet-0.4.1-py3-none-any.whl", hash = "sha256:10889a226228d515c948e3fcf811e64c0d78d7aa94823a300045653b9c284cb7"},
{file = "effdet-0.4.1.tar.gz", hash = "sha256:ac5589fd304a5650c201986b2ef5f8e10c111093a71b1c49fa6b8817710812b5"},
@ -2019,9 +2026,10 @@ idna = ">=2.0.0"
name = "emoji"
version = "2.14.1"
description = "Emoji for Python"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "emoji-2.14.1-py3-none-any.whl", hash = "sha256:35a8a486c1460addb1499e3bf7929d3889b2e2841a57401903699fef595e942b"},
{file = "emoji-2.14.1.tar.gz", hash = "sha256:f8c50043d79a2c1410ebfae833ae1868d5941a67a6cd4d18377e2eb0bd79346b"},
@ -2616,9 +2624,10 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4
name = "google-api-core"
version = "2.25.1"
description = "Google API client core library"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\""
files = [
{file = "google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7"},
{file = "google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8"},
@ -2672,9 +2681,10 @@ uritemplate = ">=3.0.1,<5"
name = "google-auth"
version = "2.40.3"
description = "Google Authentication Library"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"},
{file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"},
@ -2716,9 +2726,10 @@ httplib2 = ">=0.19.0"
name = "google-cloud-vision"
version = "3.10.2"
description = "Google Cloud Vision API client library"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "google_cloud_vision-3.10.2-py3-none-any.whl", hash = "sha256:42a17fbc2219b0a88e325e2c1df6664a8dafcbae66363fb37ebcb511b018fc87"},
{file = "google_cloud_vision-3.10.2.tar.gz", hash = "sha256:649380faab8933440b632bf88072c0c382a08d49ab02bc0b4fba821882ae1765"},
@ -2788,9 +2799,10 @@ dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "py
name = "googleapis-common-protos"
version = "1.70.0"
description = "Common protobufs used in Google APIs"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"},
{file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"},
@ -2937,9 +2949,10 @@ aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"]
name = "grpcio"
version = "1.74.0"
description = "HTTP/2-based RPC framework"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
{file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
@ -3001,9 +3014,10 @@ protobuf = ["grpcio-tools (>=1.74.0)"]
name = "grpcio-status"
version = "1.71.2"
description = "Status proto mapping for gRPC"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\""
files = [
{file = "grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3"},
{file = "grpcio_status-1.71.2.tar.gz", hash = "sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50"},
@ -3038,10 +3052,9 @@ protobuf = ["protobuf (>=3.20.0)"]
name = "gunicorn"
version = "23.0.0"
description = "WSGI HTTP Server for UNIX"
optional = true
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"api\""
files = [
{file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"},
{file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"},
@ -3142,9 +3155,10 @@ files = [
name = "html5lib"
version = "1.1"
description = "HTML parser based on the WHATWG HTML specification"
optional = false
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"},
{file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"},
@ -3897,9 +3911,10 @@ ply = "*"
name = "jsonpath-python"
version = "1.0.6"
description = "A more powerful JSONPath implementation in modern python"
optional = false
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"},
{file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"},
@ -4478,9 +4493,10 @@ langchain-core = ">=0.3.72,<1.0.0"
name = "langdetect"
version = "1.0.9"
description = "Language detection library ported from Google's language-detection."
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
{file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"},
@ -4732,9 +4748,10 @@ dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; pytho
name = "lxml"
version = "6.0.1"
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "lxml-6.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3b38e20c578149fdbba1fd3f36cb1928a3aaca4b011dfd41ba09d11fb396e1b9"},
{file = "lxml-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:11a052cbd013b7140bbbb38a14e2329b6192478344c99097e378c691b7119551"},
@ -5008,9 +5025,10 @@ files = [
name = "marshmallow"
version = "3.26.1"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"llama-index\" or extra == \"docs\""
files = [
{file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"},
{file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"},
@ -5389,9 +5407,10 @@ mkdocstrings = ">=0.26"
name = "ml-dtypes"
version = "0.5.3"
description = "ml_dtypes is a stand-alone implementation of several NumPy dtype extensions used in machine learning."
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "ml_dtypes-0.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0a1d68a7cb53e3f640b2b6a34d12c0542da3dd935e560fdf463c0c77f339fc20"},
{file = "ml_dtypes-0.5.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cd5a6c711b5350f3cbc2ac28def81cd1c580075ccb7955e61e9d8f4bfd40d24"},
@ -5846,9 +5865,10 @@ reports = ["lxml"]
name = "mypy-extensions"
version = "1.1.0"
description = "Type system extensions for programs checked with the mypy type checker."
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"llama-index\" or extra == \"docs\" or extra == \"dev\""
files = [
{file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"},
{file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
@ -5989,9 +6009,10 @@ pyarrow = ["pyarrow (>=1.0.0)"]
name = "nest-asyncio"
version = "1.6.0"
description = "Patch asyncio to allow nested event loops"
optional = false
optional = true
python-versions = ">=3.5"
groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"docs\""
files = [
{file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
{file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"},
@ -6258,10 +6279,10 @@ files = [
name = "nvidia-cublas-cu12"
version = "12.8.4.1"
description = "CUBLAS native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"},
{file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"},
@ -6272,10 +6293,10 @@ files = [
name = "nvidia-cuda-cupti-cu12"
version = "12.8.90"
description = "CUDA profiling tools runtime libs."
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"},
{file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"},
@ -6286,10 +6307,10 @@ files = [
name = "nvidia-cuda-nvrtc-cu12"
version = "12.8.93"
description = "NVRTC native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"},
{file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"},
@ -6300,10 +6321,10 @@ files = [
name = "nvidia-cuda-runtime-cu12"
version = "12.8.90"
description = "CUDA Runtime native Libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"},
{file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"},
@ -6314,10 +6335,10 @@ files = [
name = "nvidia-cudnn-cu12"
version = "9.10.2.21"
description = "cuDNN runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"},
{file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"},
@ -6331,10 +6352,10 @@ nvidia-cublas-cu12 = "*"
name = "nvidia-cufft-cu12"
version = "11.3.3.83"
description = "CUFFT native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"},
{file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"},
@ -6348,10 +6369,10 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-cufile-cu12"
version = "1.13.1.3"
description = "cuFile GPUDirect libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"},
{file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"},
@ -6361,10 +6382,10 @@ files = [
name = "nvidia-curand-cu12"
version = "10.3.9.90"
description = "CURAND native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"},
{file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"},
@ -6375,10 +6396,10 @@ files = [
name = "nvidia-cusolver-cu12"
version = "11.7.3.90"
description = "CUDA solver native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"},
{file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"},
@ -6394,10 +6415,10 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-cusparse-cu12"
version = "12.5.8.93"
description = "CUSPARSE native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"},
{file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"},
@ -6411,10 +6432,10 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-cusparselt-cu12"
version = "0.7.1"
description = "NVIDIA cuSPARSELt"
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"},
{file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"},
@ -6425,10 +6446,10 @@ files = [
name = "nvidia-nccl-cu12"
version = "2.27.3"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f"},
{file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039"},
@ -6438,10 +6459,10 @@ files = [
name = "nvidia-nvjitlink-cu12"
version = "12.8.93"
description = "Nvidia JIT LTO Library"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"},
{file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"},
@ -6452,10 +6473,10 @@ files = [
name = "nvidia-nvtx-cu12"
version = "12.8.90"
description = "NVIDIA Tools Extension"
optional = false
optional = true
python-versions = ">=3"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"},
{file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"},
@ -6484,9 +6505,10 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
name = "olefile"
version = "0.47"
description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)"
optional = false
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f"},
{file = "olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c"},
@ -6516,9 +6538,10 @@ pydantic = ">=2.9"
name = "omegaconf"
version = "2.3.0"
description = "A flexible configuration library"
optional = false
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"},
{file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"},
@ -6532,9 +6555,10 @@ PyYAML = ">=5.1.0"
name = "onnx"
version = "1.19.0"
description = "Open Neural Network Exchange"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "onnx-1.19.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:e927d745939d590f164e43c5aec7338c5a75855a15130ee795f492fc3a0fa565"},
{file = "onnx-1.19.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c6cdcb237c5c4202463bac50417c5a7f7092997a8469e8b7ffcd09f51de0f4a9"},
@ -6649,9 +6673,10 @@ voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]
name = "opencv-python"
version = "4.11.0.86"
description = "Wrapper package for OpenCV python bindings."
optional = false
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"},
{file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"},
@ -6877,9 +6902,10 @@ files = [
name = "orderly-set"
version = "5.5.0"
description = "Orderly set"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "orderly_set-5.5.0-py3-none-any.whl", hash = "sha256:46f0b801948e98f427b412fcabb831677194c05c3b699b80de260374baa0b1e7"},
{file = "orderly_set-5.5.0.tar.gz", hash = "sha256:e87185c8e4d8afa64e7f8160ee2c542a475b738bc891dc3f58102e654125e6ce"},
@ -7178,9 +7204,10 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1
name = "pdf2image"
version = "1.17.0"
description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list."
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"},
{file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"},
@ -7193,9 +7220,10 @@ pillow = "*"
name = "pdfminer-six"
version = "20250506"
description = "PDF parser and analyzer"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pdfminer_six-20250506-py3-none-any.whl", hash = "sha256:d81ad173f62e5f841b53a8ba63af1a4a355933cfc0ffabd608e568b9193909e3"},
{file = "pdfminer_six-20250506.tar.gz", hash = "sha256:b03cc8df09cf3c7aba8246deae52e0bca7ebb112a38895b5e1d4f5dd2b8ca2e7"},
@ -7327,9 +7355,10 @@ numpy = "*"
name = "pi-heif"
version = "1.1.0"
description = "Python interface for libheif library"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pi_heif-1.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:888c195a097cfe8d03ef6c30a8d57d7ef21795b67d7ec79769c2707e2d919e32"},
{file = "pi_heif-1.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:f03ebfe71ab89b1e9d8d9976f306bb881e156d16ecb323ced9fce59a6ca46a20"},
@ -7397,9 +7426,10 @@ tests-min = ["defusedxml", "packaging", "pytest"]
name = "pikepdf"
version = "9.11.0"
description = "Read and write PDFs with Python, powered by qpdf"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pikepdf-9.11.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:8ac1adbb2e32a1cefb9fc51f1e892de1ce0af506f040593384b3af973a46089b"},
{file = "pikepdf-9.11.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:f53ccda7be5aa7457a1b32b635a1e289dcdccb607b4fa7198a2c70e163fc0b8b"},
@ -7864,9 +7894,10 @@ files = [
name = "proto-plus"
version = "1.26.1"
description = "Beautiful, Pythonic protocol buffers"
optional = false
optional = true
python-versions = ">=3.7"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\""
files = [
{file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
{file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
@ -7903,9 +7934,10 @@ files = [
name = "psutil"
version = "7.0.0"
description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7."
optional = false
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\""
files = [
{file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"},
{file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"},
@ -8208,9 +8240,10 @@ test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
name = "pyasn1"
version = "0.6.1"
description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
{file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@ -8220,9 +8253,10 @@ files = [
name = "pyasn1-modules"
version = "0.4.2"
description = "A collection of ASN.1-based protocols modules"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"},
{file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"},
@ -8235,9 +8269,10 @@ pyasn1 = ">=0.6.1,<0.7.0"
name = "pycocotools"
version = "2.0.10"
description = "Official APIs for the MS-COCO dataset"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pycocotools-2.0.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:94d558e6a4b92620dad1684b74b6c1404e20d5ed3b4f3aed64ad817d5dd46c72"},
{file = "pycocotools-2.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4d61959f505f1333afd1666ece1a9f8dad318de160c56c7d03f22d7b5556478"},
@ -8689,9 +8724,10 @@ image = ["Pillow (>=8.0.0)"]
name = "pypdfium2"
version = "4.30.0"
description = "Python bindings to PDFium"
optional = false
optional = true
python-versions = ">=3.6"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"},
{file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"},
@ -8994,9 +9030,10 @@ cli = ["click (>=5.0)"]
name = "python-iso639"
version = "2025.2.18"
description = "ISO 639 language codes, names, and other associated information"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "python_iso639-2025.2.18-py3-none-any.whl", hash = "sha256:b2d471c37483a26f19248458b20e7bd96492e15368b01053b540126bcc23152f"},
{file = "python_iso639-2025.2.18.tar.gz", hash = "sha256:34e31e8e76eb3fc839629e257b12bcfd957c6edcbd486bbf66ba5185d1f566e8"},
@ -9025,9 +9062,10 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "black", "build", "freez
name = "python-magic"
version = "0.4.27"
description = "File type identification using libmagic"
optional = false
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"},
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
@ -9063,9 +9101,10 @@ files = [
name = "python-oxmsg"
version = "0.0.2"
description = "Extract attachments from Outlook .msg files."
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "python_oxmsg-0.0.2-py3-none-any.whl", hash = "sha256:22be29b14c46016bcd05e34abddfd8e05ee82082f53b82753d115da3fc7d0355"},
{file = "python_oxmsg-0.0.2.tar.gz", hash = "sha256:a6aff4deb1b5975d44d49dab1d9384089ffeec819e19c6940bc7ffbc84775fad"},
@ -9358,9 +9397,10 @@ files = [
name = "rapidfuzz"
version = "3.14.0"
description = "rapid fuzzy string matching"
optional = false
optional = true
python-versions = ">=3.10"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "rapidfuzz-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91d8c7d9d38835d5fcf9bc87593add864eaea41eb33654d93ded3006b198a326"},
{file = "rapidfuzz-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a1e574230262956d28e40191dd44ad3d81d2d29b5e716c6c7c0ba17c4d1524e"},
@ -9664,9 +9704,10 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
name = "requests-toolbelt"
version = "1.0.0"
description = "A utility belt for advanced users of python-requests"
optional = false
optional = true
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
groups = ["main"]
markers = "extra == \"neptune\" or extra == \"langchain\" or extra == \"docs\""
files = [
{file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
{file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
@ -9941,9 +9982,10 @@ files = [
name = "rsa"
version = "4.9.1"
description = "Pure-Python RSA implementation"
optional = false
optional = true
python-versions = "<4,>=3.6"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"},
{file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"},
@ -10028,9 +10070,10 @@ crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
name = "safetensors"
version = "0.6.2"
description = ""
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\""
files = [
{file = "safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba"},
{file = "safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b"},
@ -10570,9 +10613,10 @@ files = [
name = "soupsieve"
version = "2.7"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\""
files = [
{file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"},
{file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"},
@ -10899,9 +10943,10 @@ blobfile = ["blobfile (>=2)"]
name = "timm"
version = "1.0.19"
description = "PyTorch Image Models"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "timm-1.0.19-py3-none-any.whl", hash = "sha256:c07b56c32f3d3226c656f75c1b5479c08eb34eefed927c82fd8751a852f47931"},
{file = "timm-1.0.19.tar.gz", hash = "sha256:6e71e1f67ac80c229d3a78ca58347090514c508aeba8f2e2eb5289eda86e9f43"},
@ -11039,9 +11084,10 @@ files = [
name = "torch"
version = "2.8.0"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905"},
{file = "torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011"},
@ -11102,9 +11148,10 @@ pyyaml = ["pyyaml"]
name = "torchvision"
version = "0.23.0"
description = "image and video datasets and models for torch deep learning"
optional = false
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "torchvision-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7266871daca00ad46d1c073e55d972179d12a58fa5c9adec9a3db9bbed71284a"},
{file = "torchvision-0.23.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:31c583ba27426a3a04eca8c05450524105c1564db41be6632f7536ef405a6de2"},
@ -11206,9 +11253,10 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
name = "transformers"
version = "4.55.4"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
optional = false
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\""
files = [
{file = "transformers-4.55.4-py3-none-any.whl", hash = "sha256:df28f3849665faba4af5106f0db4510323277c4bb595055340544f7e59d06458"},
{file = "transformers-4.55.4.tar.gz", hash = "sha256:574a30559bc273c7a4585599ff28ab6b676e96dc56ffd2025ecfce2fd0ab915d"},
@ -11346,10 +11394,10 @@ core = ["tree-sitter (>=0.22,<1.0)"]
name = "triton"
version = "3.4.0"
description = "A language and compiler for custom Deep Learning operations"
optional = false
optional = true
python-versions = "<3.14,>=3.9"
groups = ["main"]
markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\""
files = [
{file = "triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128"},
{file = "triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467"},
@ -11463,9 +11511,10 @@ files = [
name = "typing-inspect"
version = "0.9.0"
description = "Runtime inspection utilities for typing module."
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"llama-index\" or extra == \"docs\""
files = [
{file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
{file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
@ -11506,9 +11555,10 @@ files = [
name = "unstructured"
version = "0.18.14"
description = "A library that prepares raw documents for downstream ML tasks."
optional = false
optional = true
python-versions = ">=3.10.0"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "unstructured-0.18.14-py3-none-any.whl", hash = "sha256:cc6fadcf2f84fb6d910dd87bcbd54d8b6e2593ce29b851af167bc786d9263f73"},
{file = "unstructured-0.18.14.tar.gz", hash = "sha256:c23760dd38dd6eca924a2803a005318f4ddc8af6a69388a7ce9d5b7fbc4b51bf"},
@ -11582,9 +11632,10 @@ xlsx = ["msoffcrypto-tool", "networkx", "openpyxl", "pandas", "xlrd"]
name = "unstructured-client"
version = "0.25.9"
description = "Python Client SDK for Unstructured API"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "unstructured-client-0.25.9.tar.gz", hash = "sha256:fcc461623f58fefb0e22508e28bf653a8f6934b9779cb4a90dd68d77a39fb5b2"},
{file = "unstructured_client-0.25.9-py3-none-any.whl", hash = "sha256:c984c01878c8fc243be7c842467d1113a194d885ab6396ae74258ee42717c5b5"},
@ -11619,9 +11670,10 @@ dev = ["pylint (==3.1.0)"]
name = "unstructured-inference"
version = "1.0.5"
description = "A library for performing inference using trained models."
optional = false
optional = true
python-versions = ">=3.7.0"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "unstructured_inference-1.0.5-py3-none-any.whl", hash = "sha256:ecbe385a6c58ca6b68b5723ed3cb540b70fd6317eecd1d5e6541516edf7071d0"},
{file = "unstructured_inference-1.0.5.tar.gz", hash = "sha256:ccd6881b0f03c533418bde6c9bd178a6660da8efbbe8c06a08afda9f25fe732b"},
@ -11649,9 +11701,10 @@ transformers = ">=4.25.1"
name = "unstructured-pytesseract"
version = "0.3.15"
description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR"
optional = false
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"docs\""
files = [
{file = "unstructured.pytesseract-0.3.15-py3-none-any.whl", hash = "sha256:a3f505c5efb7ff9f10379051a7dd6aa624b3be6b0f023ed6767cc80d0b1613d1"},
{file = "unstructured.pytesseract-0.3.15.tar.gz", hash = "sha256:4b81bc76cfff4e2ef37b04863f0e48bd66184c0b39c3b2b4e017483bca1a7394"},
@ -11712,10 +11765,9 @@ zstd = ["zstandard (>=0.18.0)"]
name = "uvicorn"
version = "0.35.0"
description = "The lightning-fast ASGI server."
optional = true
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"api\" or extra == \"chromadb\""
files = [
{file = "uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a"},
{file = "uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01"},
@ -12005,9 +12057,10 @@ files = [
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
optional = false
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\""
files = [
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
@ -12035,10 +12088,9 @@ test = ["websockets"]
name = "websockets"
version = "15.0.1"
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
optional = true
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"api\" or extra == \"deepeval\" or extra == \"chromadb\""
files = [
{file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"},
{file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"},
@ -12543,7 +12595,7 @@ cffi = ["cffi (>=1.17) ; python_version >= \"3.13\" and platform_python_implemen
[extras]
anthropic = ["anthropic"]
api = ["gunicorn", "uvicorn", "websockets"]
api = []
aws = ["s3fs"]
chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
@ -12573,4 +12625,4 @@ posthog = ["posthog"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<=3.13"
content-hash = "e62e318b0a25181a2d550fdb641a4f954f3d8434e76351f8a9009a64b53f0a52"
content-hash = "461ec32fa750cde5939bec812a08aa6ea2295fa2fc346ffdbffcf91711b02720"

View file

@ -42,7 +42,6 @@ dependencies = [
"aiofiles>=23.2.1,<24.0.0",
"rdflib>=7.1.4,<7.2.0",
"pypdf>=4.1.0,<7.0.0",
"unstructured[pdf]>=0.18.1,<19",
"jinja2>=3.1.3,<4",
"matplotlib>=3.8.3,<4",
"networkx>=3.4.2,<4",
@ -106,7 +105,7 @@ chromadb = [
"chromadb>=0.6,<0.7",
"pypika==0.48.9",
]
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.18.1,<19"]
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
codegraph = [
"fastembed<=0.6.0 ; python_version < '3.13'",
"transformers>=4.46.3,<5",

6
uv.lock generated
View file

@ -899,7 +899,6 @@ dependencies = [
{ name = "structlog" },
{ name = "tiktoken" },
{ name = "typing-extensions" },
{ name = "unstructured", extra = ["pdf"] },
{ name = "uvicorn" },
{ name = "websockets" },
]
@ -947,7 +946,7 @@ distributed = [
{ name = "modal" },
]
docs = [
{ name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] },
{ name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "pdf", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] },
]
evals = [
{ name = "gdown" },
@ -1101,8 +1100,7 @@ requires-dist = [
{ name = "tree-sitter-python", marker = "extra == 'codegraph'", specifier = ">=0.23.6,<0.24" },
{ name = "tweepy", marker = "extra == 'dev'", specifier = ">=4.14.0,<5.0.0" },
{ name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" },
{ name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
{ name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" },
{ name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx", "pdf"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
{ name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
{ name = "websockets", specifier = ">=15.0.1,<16.0.0" },
]