From 7fae75d020fbdf7156f668dc4d230103d6b1ac87 Mon Sep 17 00:00:00 2001 From: EricXiao Date: Mon, 22 Sep 2025 15:07:58 +0800 Subject: [PATCH] make advanced pdf loader optional Signed-off-by: EricXiao --- cognee/infrastructure/loaders/LoaderEngine.py | 6 +- .../loaders/external/__init__.py | 10 +- .../loaders/external/advanced_pdf_loader.py | 18 +- .../loaders/supported_loaders.py | 10 +- cognee/tests/test_advanced_pdf_loader.py | 27 +- poetry.lock | 238 +++++++++++------- pyproject.toml | 3 +- uv.lock | 6 +- 8 files changed, 178 insertions(+), 140 deletions(-) diff --git a/cognee/infrastructure/loaders/LoaderEngine.py b/cognee/infrastructure/loaders/LoaderEngine.py index 65d916f6f..d4be2aab5 100644 --- a/cognee/infrastructure/loaders/LoaderEngine.py +++ b/cognee/infrastructure/loaders/LoaderEngine.py @@ -87,7 +87,7 @@ class LoaderEngine: if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime): return loader else: - raise ValueError(f"Loader does not exist: {loader_name}") + logger.info(f"Skipping {loader_name}: Preferred Loader not registered") # Try default priority order for loader_name in self.default_loader_priority: @@ -96,7 +96,9 @@ class LoaderEngine: if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime): return loader else: - raise ValueError(f"Loader does not exist: {loader_name}") + logger.info( + f"Skipping {loader_name}: Loader not registered (in default priority list)." + ) return None diff --git a/cognee/infrastructure/loaders/external/__init__.py b/cognee/infrastructure/loaders/external/__init__.py index 9d9c32aaa..6bf9f9200 100644 --- a/cognee/infrastructure/loaders/external/__init__.py +++ b/cognee/infrastructure/loaders/external/__init__.py @@ -9,10 +9,9 @@ This module contains loaders that depend on external libraries: These loaders are optional and only available if their dependencies are installed. """ -from .advanced_pdf_loader import AdvancedPdfLoader from .pypdf_loader import PyPdfLoader -__all__ = ["AdvancedPdfLoader", "PyPdfLoader"] +__all__ = ["PyPdfLoader"] # Conditional imports based on dependency availability try: @@ -21,3 +20,10 @@ try: __all__.append("UnstructuredLoader") except ImportError: pass + +try: + from .advanced_pdf_loader import AdvancedPdfLoader + + __all__.append("AdvancedPdfLoader") +except ImportError: + pass diff --git a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py index 138fb3dab..7bab8cac6 100644 --- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py +++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py @@ -14,6 +14,14 @@ from cognee.infrastructure.loaders.external.pypdf_loader import PyPdfLoader logger = get_logger(__name__) +try: + from unstructured.partition.pdf import partition_pdf +except ImportError as e: + logger.info( + "unstructured[pdf] not installed, can't use AdvancedPdfLoader, will use PyPdfLoader instead." + ) + raise ImportError from e + @dataclass class _PageBuffer: @@ -61,16 +69,6 @@ class AdvancedPdfLoader(LoaderInterface): LoaderResult with extracted text content and metadata """ - try: - from unstructured.partition.pdf import partition_pdf - - except ImportError: - logger.warning( - "unstructured[pdf] not installed, can't use AdvancedPdfLoader, using PyPDF fallback." - ) - - return await self._fallback(file_path, **kwargs) - try: logger.info(f"Processing PDF: {file_path}") diff --git a/cognee/infrastructure/loaders/supported_loaders.py b/cognee/infrastructure/loaders/supported_loaders.py index d71f53f11..d103babe3 100644 --- a/cognee/infrastructure/loaders/supported_loaders.py +++ b/cognee/infrastructure/loaders/supported_loaders.py @@ -1,9 +1,8 @@ -from cognee.infrastructure.loaders.external import AdvancedPdfLoader, PyPdfLoader +from cognee.infrastructure.loaders.external import PyPdfLoader from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader # Registry for loader implementations supported_loaders = { - AdvancedPdfLoader.loader_name: AdvancedPdfLoader, PyPdfLoader.loader_name: PyPdfLoader, TextLoader.loader_name: TextLoader, ImageLoader.loader_name: ImageLoader, @@ -17,3 +16,10 @@ try: supported_loaders[UnstructuredLoader.loader_name] = UnstructuredLoader except ImportError: pass + +try: + from cognee.infrastructure.loaders.external import AdvancedPdfLoader + + supported_loaders[AdvancedPdfLoader.loader_name] = AdvancedPdfLoader +except ImportError: + pass diff --git a/cognee/tests/test_advanced_pdf_loader.py b/cognee/tests/test_advanced_pdf_loader.py index 0e79d44e8..8b799c12b 100644 --- a/cognee/tests/test_advanced_pdf_loader.py +++ b/cognee/tests/test_advanced_pdf_loader.py @@ -42,29 +42,6 @@ def test_can_handle(loader, extension, mime_type, expected): assert loader.can_handle(extension, mime_type) == expected -@pytest.mark.asyncio -@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader") -@patch( - "unstructured.partition.pdf.partition_pdf", - side_effect=ImportError("unstructured not installed"), -) -async def test_load_fallback_on_import_error(mock_partition_pdf, mock_pypdf_loader, loader): - """Test fallback to PyPdfLoader when unstructured is not installed""" - # Prepare Mock - mock_fallback_instance = MagicMock() - mock_fallback_instance.load = AsyncMock(return_value="/fake/path/fallback.txt") - mock_pypdf_loader.return_value = mock_fallback_instance - test_file_path = "/fake/path/to/document.pdf" - - # Run - result_path = await loader.load(test_file_path) - - # Assert - assert result_path == "/fake/path/fallback.txt" - mock_partition_pdf.assert_not_called() # partition_pdf should not be called - mock_fallback_instance.load.assert_awaited_once_with(test_file_path) - - @pytest.mark.asyncio @patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open) @patch( @@ -74,7 +51,7 @@ async def test_load_fallback_on_import_error(mock_partition_pdf, mock_pypdf_load @patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_storage_config") @patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_storage") @patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader") -@patch("unstructured.partition.pdf.partition_pdf") +@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf") async def test_load_success_with_unstructured( mock_partition_pdf, mock_pypdf_loader, @@ -141,7 +118,7 @@ async def test_load_success_with_unstructured( ) @patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader") @patch( - "unstructured.partition.pdf.partition_pdf", + "cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf", side_effect=Exception("Unstructured failed!"), ) async def test_load_fallback_on_unstructured_exception( diff --git a/poetry.lock b/poetry.lock index 9ff46e2f4..25cfe6294 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4,9 +4,10 @@ name = "accelerate" version = "1.10.1" description = "Accelerate" -optional = false +optional = true python-versions = ">=3.9.0" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "accelerate-1.10.1-py3-none-any.whl", hash = "sha256:3621cff60b9a27ce798857ece05e2b9f56fcc71631cfb31ccf71f0359c311f11"}, {file = "accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8"}, @@ -308,9 +309,10 @@ vertex = ["google-auth[requests] (>=2,<3)"] name = "antlr4-python3-runtime" version = "4.9.3" description = "ANTLR 4.9.3 runtime for Python 3.7" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, ] @@ -758,9 +760,10 @@ typecheck = ["mypy"] name = "beautifulsoup4" version = "4.13.5" description = "Screen-scraping library" -optional = false +optional = true python-versions = ">=3.7.0" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\"" files = [ {file = "beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a"}, {file = "beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695"}, @@ -865,9 +868,10 @@ virtualenv = ["virtualenv (>=20.11) ; python_version < \"3.10\"", "virtualenv (> name = "cachetools" version = "5.5.2" description = "Extensible memoizing collections and decorators" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, @@ -1582,9 +1586,10 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] name = "dataclasses-json" version = "0.6.7" description = "Easily serialize dataclasses to and from JSON." -optional = false +optional = true python-versions = "<4.0,>=3.7" groups = ["main"] +markers = "extra == \"llama-index\" or extra == \"docs\"" files = [ {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, @@ -1648,9 +1653,10 @@ files = [ name = "deepdiff" version = "8.6.0" description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "deepdiff-8.6.0-py3-none-any.whl", hash = "sha256:db80677a434ac1f84147fd1598e93f1beb06d467e107af45fcf77cf8a681169f"}, {file = "deepdiff-8.6.0.tar.gz", hash = "sha256:6197216c2d777c3106a9989055c230e25848e599b26dcbcdc66226bd8d7fe901"}, @@ -1984,9 +1990,10 @@ files = [ name = "effdet" version = "0.4.1" description = "EfficientDet for PyTorch" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "effdet-0.4.1-py3-none-any.whl", hash = "sha256:10889a226228d515c948e3fcf811e64c0d78d7aa94823a300045653b9c284cb7"}, {file = "effdet-0.4.1.tar.gz", hash = "sha256:ac5589fd304a5650c201986b2ef5f8e10c111093a71b1c49fa6b8817710812b5"}, @@ -2019,9 +2026,10 @@ idna = ">=2.0.0" name = "emoji" version = "2.14.1" description = "Emoji for Python" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "emoji-2.14.1-py3-none-any.whl", hash = "sha256:35a8a486c1460addb1499e3bf7929d3889b2e2841a57401903699fef595e942b"}, {file = "emoji-2.14.1.tar.gz", hash = "sha256:f8c50043d79a2c1410ebfae833ae1868d5941a67a6cd4d18377e2eb0bd79346b"}, @@ -2616,9 +2624,10 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 name = "google-api-core" version = "2.25.1" description = "Google API client core library" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\"" files = [ {file = "google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7"}, {file = "google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8"}, @@ -2672,9 +2681,10 @@ uritemplate = ">=3.0.1,<5" name = "google-auth" version = "2.40.3" description = "Google Authentication Library" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"}, {file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"}, @@ -2716,9 +2726,10 @@ httplib2 = ">=0.19.0" name = "google-cloud-vision" version = "3.10.2" description = "Google Cloud Vision API client library" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "google_cloud_vision-3.10.2-py3-none-any.whl", hash = "sha256:42a17fbc2219b0a88e325e2c1df6664a8dafcbae66363fb37ebcb511b018fc87"}, {file = "google_cloud_vision-3.10.2.tar.gz", hash = "sha256:649380faab8933440b632bf88072c0c382a08d49ab02bc0b4fba821882ae1765"}, @@ -2788,9 +2799,10 @@ dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "py name = "googleapis-common-protos" version = "1.70.0" description = "Common protobufs used in Google APIs" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, @@ -2937,9 +2949,10 @@ aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"] name = "grpcio" version = "1.74.0" description = "HTTP/2-based RPC framework" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"}, {file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"}, @@ -3001,9 +3014,10 @@ protobuf = ["grpcio-tools (>=1.74.0)"] name = "grpcio-status" version = "1.71.2" description = "Status proto mapping for gRPC" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\"" files = [ {file = "grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3"}, {file = "grpcio_status-1.71.2.tar.gz", hash = "sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50"}, @@ -3038,10 +3052,9 @@ protobuf = ["protobuf (>=3.20.0)"] name = "gunicorn" version = "23.0.0" description = "WSGI HTTP Server for UNIX" -optional = true +optional = false python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"api\"" files = [ {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"}, {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"}, @@ -3142,9 +3155,10 @@ files = [ name = "html5lib" version = "1.1" description = "HTML parser based on the WHATWG HTML specification" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"}, {file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"}, @@ -3897,9 +3911,10 @@ ply = "*" name = "jsonpath-python" version = "1.0.6" description = "A more powerful JSONPath implementation in modern python" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, @@ -4478,9 +4493,10 @@ langchain-core = ">=0.3.72,<1.0.0" name = "langdetect" version = "1.0.9" description = "Language detection library ported from Google's language-detection." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, @@ -4732,9 +4748,10 @@ dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; pytho name = "lxml" version = "6.0.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "lxml-6.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3b38e20c578149fdbba1fd3f36cb1928a3aaca4b011dfd41ba09d11fb396e1b9"}, {file = "lxml-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:11a052cbd013b7140bbbb38a14e2329b6192478344c99097e378c691b7119551"}, @@ -5008,9 +5025,10 @@ files = [ name = "marshmallow" version = "3.26.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"llama-index\" or extra == \"docs\"" files = [ {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"}, {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"}, @@ -5389,9 +5407,10 @@ mkdocstrings = ">=0.26" name = "ml-dtypes" version = "0.5.3" description = "ml_dtypes is a stand-alone implementation of several NumPy dtype extensions used in machine learning." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "ml_dtypes-0.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0a1d68a7cb53e3f640b2b6a34d12c0542da3dd935e560fdf463c0c77f339fc20"}, {file = "ml_dtypes-0.5.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cd5a6c711b5350f3cbc2ac28def81cd1c580075ccb7955e61e9d8f4bfd40d24"}, @@ -5846,9 +5865,10 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.1.0" description = "Type system extensions for programs checked with the mypy type checker." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"llama-index\" or extra == \"docs\" or extra == \"dev\"" files = [ {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, @@ -5989,9 +6009,10 @@ pyarrow = ["pyarrow (>=1.0.0)"] name = "nest-asyncio" version = "1.6.0" description = "Patch asyncio to allow nested event loops" -optional = false +optional = true python-versions = ">=3.5" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"docs\"" files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, @@ -6258,10 +6279,10 @@ files = [ name = "nvidia-cublas-cu12" version = "12.8.4.1" description = "CUBLAS native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"}, {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"}, @@ -6272,10 +6293,10 @@ files = [ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" description = "CUDA profiling tools runtime libs." -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"}, {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"}, @@ -6286,10 +6307,10 @@ files = [ name = "nvidia-cuda-nvrtc-cu12" version = "12.8.93" description = "NVRTC native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"}, {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"}, @@ -6300,10 +6321,10 @@ files = [ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" description = "CUDA Runtime native Libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"}, {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"}, @@ -6314,10 +6335,10 @@ files = [ name = "nvidia-cudnn-cu12" version = "9.10.2.21" description = "cuDNN runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"}, {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"}, @@ -6331,10 +6352,10 @@ nvidia-cublas-cu12 = "*" name = "nvidia-cufft-cu12" version = "11.3.3.83" description = "CUFFT native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"}, {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"}, @@ -6348,10 +6369,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cufile-cu12" version = "1.13.1.3" description = "cuFile GPUDirect libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"}, {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"}, @@ -6361,10 +6382,10 @@ files = [ name = "nvidia-curand-cu12" version = "10.3.9.90" description = "CURAND native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"}, {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"}, @@ -6375,10 +6396,10 @@ files = [ name = "nvidia-cusolver-cu12" version = "11.7.3.90" description = "CUDA solver native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"}, {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"}, @@ -6394,10 +6415,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparse-cu12" version = "12.5.8.93" description = "CUSPARSE native runtime libraries" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"}, {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"}, @@ -6411,10 +6432,10 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparselt-cu12" version = "0.7.1" description = "NVIDIA cuSPARSELt" -optional = false +optional = true python-versions = "*" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"}, {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"}, @@ -6425,10 +6446,10 @@ files = [ name = "nvidia-nccl-cu12" version = "2.27.3" description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f"}, {file = "nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039"}, @@ -6438,10 +6459,10 @@ files = [ name = "nvidia-nvjitlink-cu12" version = "12.8.93" description = "Nvidia JIT LTO Library" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"}, {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"}, @@ -6452,10 +6473,10 @@ files = [ name = "nvidia-nvtx-cu12" version = "12.8.90" description = "NVIDIA Tools Extension" -optional = false +optional = true python-versions = ">=3" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"}, {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"}, @@ -6484,9 +6505,10 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] name = "olefile" version = "0.47" description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f"}, {file = "olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c"}, @@ -6516,9 +6538,10 @@ pydantic = ">=2.9" name = "omegaconf" version = "2.3.0" description = "A flexible configuration library" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"}, {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, @@ -6532,9 +6555,10 @@ PyYAML = ">=5.1.0" name = "onnx" version = "1.19.0" description = "Open Neural Network Exchange" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "onnx-1.19.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:e927d745939d590f164e43c5aec7338c5a75855a15130ee795f492fc3a0fa565"}, {file = "onnx-1.19.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c6cdcb237c5c4202463bac50417c5a7f7092997a8469e8b7ffcd09f51de0f4a9"}, @@ -6649,9 +6673,10 @@ voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] name = "opencv-python" version = "4.11.0.86" description = "Wrapper package for OpenCV python bindings." -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"}, {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"}, @@ -6877,9 +6902,10 @@ files = [ name = "orderly-set" version = "5.5.0" description = "Orderly set" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "orderly_set-5.5.0-py3-none-any.whl", hash = "sha256:46f0b801948e98f427b412fcabb831677194c05c3b699b80de260374baa0b1e7"}, {file = "orderly_set-5.5.0.tar.gz", hash = "sha256:e87185c8e4d8afa64e7f8160ee2c542a475b738bc891dc3f58102e654125e6ce"}, @@ -7178,9 +7204,10 @@ test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1 name = "pdf2image" version = "1.17.0" description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"}, {file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"}, @@ -7193,9 +7220,10 @@ pillow = "*" name = "pdfminer-six" version = "20250506" description = "PDF parser and analyzer" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pdfminer_six-20250506-py3-none-any.whl", hash = "sha256:d81ad173f62e5f841b53a8ba63af1a4a355933cfc0ffabd608e568b9193909e3"}, {file = "pdfminer_six-20250506.tar.gz", hash = "sha256:b03cc8df09cf3c7aba8246deae52e0bca7ebb112a38895b5e1d4f5dd2b8ca2e7"}, @@ -7327,9 +7355,10 @@ numpy = "*" name = "pi-heif" version = "1.1.0" description = "Python interface for libheif library" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pi_heif-1.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:888c195a097cfe8d03ef6c30a8d57d7ef21795b67d7ec79769c2707e2d919e32"}, {file = "pi_heif-1.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:f03ebfe71ab89b1e9d8d9976f306bb881e156d16ecb323ced9fce59a6ca46a20"}, @@ -7397,9 +7426,10 @@ tests-min = ["defusedxml", "packaging", "pytest"] name = "pikepdf" version = "9.11.0" description = "Read and write PDFs with Python, powered by qpdf" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pikepdf-9.11.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:8ac1adbb2e32a1cefb9fc51f1e892de1ce0af506f040593384b3af973a46089b"}, {file = "pikepdf-9.11.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:f53ccda7be5aa7457a1b32b635a1e289dcdccb607b4fa7198a2c70e163fc0b8b"}, @@ -7864,9 +7894,10 @@ files = [ name = "proto-plus" version = "1.26.1" description = "Beautiful, Pythonic protocol buffers" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\"" files = [ {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"}, {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"}, @@ -7903,9 +7934,10 @@ files = [ name = "psutil" version = "7.0.0" description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\"" files = [ {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, @@ -8208,9 +8240,10 @@ test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] name = "pyasn1" version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, @@ -8220,9 +8253,10 @@ files = [ name = "pyasn1-modules" version = "0.4.2" description = "A collection of ASN.1-based protocols modules" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, @@ -8235,9 +8269,10 @@ pyasn1 = ">=0.6.1,<0.7.0" name = "pycocotools" version = "2.0.10" description = "Official APIs for the MS-COCO dataset" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pycocotools-2.0.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:94d558e6a4b92620dad1684b74b6c1404e20d5ed3b4f3aed64ad817d5dd46c72"}, {file = "pycocotools-2.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4d61959f505f1333afd1666ece1a9f8dad318de160c56c7d03f22d7b5556478"}, @@ -8689,9 +8724,10 @@ image = ["Pillow (>=8.0.0)"] name = "pypdfium2" version = "4.30.0" description = "Python bindings to PDFium" -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab"}, {file = "pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de"}, @@ -8994,9 +9030,10 @@ cli = ["click (>=5.0)"] name = "python-iso639" version = "2025.2.18" description = "ISO 639 language codes, names, and other associated information" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "python_iso639-2025.2.18-py3-none-any.whl", hash = "sha256:b2d471c37483a26f19248458b20e7bd96492e15368b01053b540126bcc23152f"}, {file = "python_iso639-2025.2.18.tar.gz", hash = "sha256:34e31e8e76eb3fc839629e257b12bcfd957c6edcbd486bbf66ba5185d1f566e8"}, @@ -9025,9 +9062,10 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "black", "build", "freez name = "python-magic" version = "0.4.27" description = "File type identification using libmagic" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, @@ -9063,9 +9101,10 @@ files = [ name = "python-oxmsg" version = "0.0.2" description = "Extract attachments from Outlook .msg files." -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "python_oxmsg-0.0.2-py3-none-any.whl", hash = "sha256:22be29b14c46016bcd05e34abddfd8e05ee82082f53b82753d115da3fc7d0355"}, {file = "python_oxmsg-0.0.2.tar.gz", hash = "sha256:a6aff4deb1b5975d44d49dab1d9384089ffeec819e19c6940bc7ffbc84775fad"}, @@ -9358,9 +9397,10 @@ files = [ name = "rapidfuzz" version = "3.14.0" description = "rapid fuzzy string matching" -optional = false +optional = true python-versions = ">=3.10" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "rapidfuzz-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91d8c7d9d38835d5fcf9bc87593add864eaea41eb33654d93ded3006b198a326"}, {file = "rapidfuzz-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a1e574230262956d28e40191dd44ad3d81d2d29b5e716c6c7c0ba17c4d1524e"}, @@ -9664,9 +9704,10 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" groups = ["main"] +markers = "extra == \"neptune\" or extra == \"langchain\" or extra == \"docs\"" files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, @@ -9941,9 +9982,10 @@ files = [ name = "rsa" version = "4.9.1" description = "Pure-Python RSA implementation" -optional = false +optional = true python-versions = "<4,>=3.6" groups = ["main"] +markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, @@ -10028,9 +10070,10 @@ crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] name = "safetensors" version = "0.6.2" description = "" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\"" files = [ {file = "safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba"}, {file = "safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b"}, @@ -10570,9 +10613,10 @@ files = [ name = "soupsieve" version = "2.7" description = "A modern CSS selector implementation for Beautiful Soup." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\" or extra == \"evals\"" files = [ {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, @@ -10899,9 +10943,10 @@ blobfile = ["blobfile (>=2)"] name = "timm" version = "1.0.19" description = "PyTorch Image Models" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "timm-1.0.19-py3-none-any.whl", hash = "sha256:c07b56c32f3d3226c656f75c1b5479c08eb34eefed927c82fd8751a852f47931"}, {file = "timm-1.0.19.tar.gz", hash = "sha256:6e71e1f67ac80c229d3a78ca58347090514c508aeba8f2e2eb5289eda86e9f43"}, @@ -11039,9 +11084,10 @@ files = [ name = "torch" version = "2.8.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false +optional = true python-versions = ">=3.9.0" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905"}, {file = "torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011"}, @@ -11102,9 +11148,10 @@ pyyaml = ["pyyaml"] name = "torchvision" version = "0.23.0" description = "image and video datasets and models for torch deep learning" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "torchvision-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7266871daca00ad46d1c073e55d972179d12a58fa5c9adec9a3db9bbed71284a"}, {file = "torchvision-0.23.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:31c583ba27426a3a04eca8c05450524105c1564db41be6632f7536ef405a6de2"}, @@ -11206,9 +11253,10 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, name = "transformers" version = "4.55.4" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" -optional = false +optional = true python-versions = ">=3.9.0" groups = ["main"] +markers = "extra == \"huggingface\" or extra == \"ollama\" or extra == \"codegraph\" or extra == \"docs\"" files = [ {file = "transformers-4.55.4-py3-none-any.whl", hash = "sha256:df28f3849665faba4af5106f0db4510323277c4bb595055340544f7e59d06458"}, {file = "transformers-4.55.4.tar.gz", hash = "sha256:574a30559bc273c7a4585599ff28ab6b676e96dc56ffd2025ecfce2fd0ab915d"}, @@ -11346,10 +11394,10 @@ core = ["tree-sitter (>=0.22,<1.0)"] name = "triton" version = "3.4.0" description = "A language and compiler for custom Deep Learning operations" -optional = false +optional = true python-versions = "<3.14,>=3.9" groups = ["main"] -markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\"" +markers = "platform_machine == \"x86_64\" and extra == \"docs\" and platform_system == \"Linux\"" files = [ {file = "triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128"}, {file = "triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467"}, @@ -11463,9 +11511,10 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"llama-index\" or extra == \"docs\"" files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, @@ -11506,9 +11555,10 @@ files = [ name = "unstructured" version = "0.18.14" description = "A library that prepares raw documents for downstream ML tasks." -optional = false +optional = true python-versions = ">=3.10.0" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "unstructured-0.18.14-py3-none-any.whl", hash = "sha256:cc6fadcf2f84fb6d910dd87bcbd54d8b6e2593ce29b851af167bc786d9263f73"}, {file = "unstructured-0.18.14.tar.gz", hash = "sha256:c23760dd38dd6eca924a2803a005318f4ddc8af6a69388a7ce9d5b7fbc4b51bf"}, @@ -11582,9 +11632,10 @@ xlsx = ["msoffcrypto-tool", "networkx", "openpyxl", "pandas", "xlrd"] name = "unstructured-client" version = "0.25.9" description = "Python Client SDK for Unstructured API" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "unstructured-client-0.25.9.tar.gz", hash = "sha256:fcc461623f58fefb0e22508e28bf653a8f6934b9779cb4a90dd68d77a39fb5b2"}, {file = "unstructured_client-0.25.9-py3-none-any.whl", hash = "sha256:c984c01878c8fc243be7c842467d1113a194d885ab6396ae74258ee42717c5b5"}, @@ -11619,9 +11670,10 @@ dev = ["pylint (==3.1.0)"] name = "unstructured-inference" version = "1.0.5" description = "A library for performing inference using trained models." -optional = false +optional = true python-versions = ">=3.7.0" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "unstructured_inference-1.0.5-py3-none-any.whl", hash = "sha256:ecbe385a6c58ca6b68b5723ed3cb540b70fd6317eecd1d5e6541516edf7071d0"}, {file = "unstructured_inference-1.0.5.tar.gz", hash = "sha256:ccd6881b0f03c533418bde6c9bd178a6660da8efbbe8c06a08afda9f25fe732b"}, @@ -11649,9 +11701,10 @@ transformers = ">=4.25.1" name = "unstructured-pytesseract" version = "0.3.15" description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"docs\"" files = [ {file = "unstructured.pytesseract-0.3.15-py3-none-any.whl", hash = "sha256:a3f505c5efb7ff9f10379051a7dd6aa624b3be6b0f023ed6767cc80d0b1613d1"}, {file = "unstructured.pytesseract-0.3.15.tar.gz", hash = "sha256:4b81bc76cfff4e2ef37b04863f0e48bd66184c0b39c3b2b4e017483bca1a7394"}, @@ -11712,10 +11765,9 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.35.0" description = "The lightning-fast ASGI server." -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"api\" or extra == \"chromadb\"" files = [ {file = "uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a"}, {file = "uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01"}, @@ -12005,9 +12057,10 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -optional = false +optional = true python-versions = "*" groups = ["main"] +markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"docs\"" files = [ {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, @@ -12035,10 +12088,9 @@ test = ["websockets"] name = "websockets" version = "15.0.1" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"api\" or extra == \"deepeval\" or extra == \"chromadb\"" files = [ {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b"}, {file = "websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205"}, @@ -12543,7 +12595,7 @@ cffi = ["cffi (>=1.17) ; python_version >= \"3.13\" and platform_python_implemen [extras] anthropic = ["anthropic"] -api = ["gunicorn", "uvicorn", "websockets"] +api = [] aws = ["s3fs"] chromadb = ["chromadb", "pypika"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] @@ -12573,4 +12625,4 @@ posthog = ["posthog"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "e62e318b0a25181a2d550fdb641a4f954f3d8434e76351f8a9009a64b53f0a52" +content-hash = "461ec32fa750cde5939bec812a08aa6ea2295fa2fc346ffdbffcf91711b02720" diff --git a/pyproject.toml b/pyproject.toml index c9c67a870..6dfd62622 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,6 @@ dependencies = [ "aiofiles>=23.2.1,<24.0.0", "rdflib>=7.1.4,<7.2.0", "pypdf>=4.1.0,<7.0.0", - "unstructured[pdf]>=0.18.1,<19", "jinja2>=3.1.3,<4", "matplotlib>=3.8.3,<4", "networkx>=3.4.2,<4", @@ -106,7 +105,7 @@ chromadb = [ "chromadb>=0.6,<0.7", "pypika==0.48.9", ] -docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.18.1,<19"] +docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"] codegraph = [ "fastembed<=0.6.0 ; python_version < '3.13'", "transformers>=4.46.3,<5", diff --git a/uv.lock b/uv.lock index 7a26ee96a..305bd158f 100644 --- a/uv.lock +++ b/uv.lock @@ -899,7 +899,6 @@ dependencies = [ { name = "structlog" }, { name = "tiktoken" }, { name = "typing-extensions" }, - { name = "unstructured", extra = ["pdf"] }, { name = "uvicorn" }, { name = "websockets" }, ] @@ -947,7 +946,7 @@ distributed = [ { name = "modal" }, ] docs = [ - { name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] }, + { name = "unstructured", extra = ["csv", "doc", "docx", "epub", "md", "odt", "org", "pdf", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"] }, ] evals = [ { name = "gdown" }, @@ -1101,8 +1100,7 @@ requires-dist = [ { name = "tree-sitter-python", marker = "extra == 'codegraph'", specifier = ">=0.23.6,<0.24" }, { name = "tweepy", marker = "extra == 'dev'", specifier = ">=4.14.0,<5.0.0" }, { name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" }, - { name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" }, - { name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" }, + { name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx", "pdf"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" }, { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" }, { name = "websockets", specifier = ">=15.0.1,<16.0.0" }, ]