diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 3e479a53..f86bcea8 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -1081,11 +1081,11 @@ async def pipeline_enqueue_file( result = converter.convert(file_path) content = result.document.export_to_markdown() else: - if not pm.is_installed("pypdf2"): # type: ignore - pm.install("pypdf2") + if not pm.is_installed("pypdf"): # type: ignore + pm.install("pypdf") if not pm.is_installed("pycryptodome"): # type: ignore pm.install("pycryptodome") - from PyPDF2 import PdfReader # type: ignore + from pypdf import PdfReader # type: ignore from io import BytesIO pdf_file = BytesIO(file) diff --git a/pyproject.toml b/pyproject.toml index 29f7c58e..81e44aff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ offline-docs = [ # Document processing dependencies "openpyxl>=3.0.0,<4.0.0", "pycryptodome>=3.0.0,<4.0.0", - "pypdf2>=3.0.0", + "pypdf>=6.1.0", "python-docx>=0.8.11,<2.0.0", "python-pptx>=0.6.21,<2.0.0", ] diff --git a/requirements-offline-docs.txt b/requirements-offline-docs.txt index 14d782fd..12f02080 100644 --- a/requirements-offline-docs.txt +++ b/requirements-offline-docs.txt @@ -10,6 +10,6 @@ # Document processing dependencies (with version constraints matching pyproject.toml) openpyxl>=3.0.0,<4.0.0 pycryptodome>=3.0.0,<4.0.0 -pypdf2>=3.0.0 +pypdf>=6.1.0 python-docx>=0.8.11,<2.0.0 python-pptx>=0.6.21,<2.0.0 diff --git a/requirements-offline.txt b/requirements-offline.txt index 0582eaca..50848093 100644 --- a/requirements-offline.txt +++ b/requirements-offline.txt @@ -24,7 +24,7 @@ openpyxl>=3.0.0,<4.0.0 pycryptodome>=3.0.0,<4.0.0 pymilvus>=2.6.2,<3.0.0 pymongo>=4.0.0,<5.0.0 -pypdf2>=3.0.0 +pypdf>=6.1.0 python-docx>=0.8.11,<2.0.0 python-pptx>=0.6.21,<2.0.0 qdrant-client>=1.11.0,<2.0.0 diff --git a/uv.lock b/uv.lock index 63fa0a78..b942632f 100644 --- a/uv.lock +++ b/uv.lock @@ -1981,7 +1981,7 @@ offline = [ { name = "pycryptodome" }, { name = "pymilvus" }, { name = "pymongo" }, - { name = "pypdf2" }, + { name = "pypdf" }, { name = "python-docx" }, { name = "python-pptx" }, { name = "qdrant-client" }, @@ -1992,7 +1992,7 @@ offline = [ offline-docs = [ { name = "openpyxl" }, { name = "pycryptodome" }, - { name = "pypdf2" }, + { name = "pypdf" }, { name = "python-docx" }, { name = "python-pptx" }, ] @@ -2071,7 +2071,7 @@ requires-dist = [ { name = "pyjwt", marker = "extra == 'api'", specifier = ">=2.8.0,<3.0.0" }, { name = "pymilvus", marker = "extra == 'offline-storage'", specifier = ">=2.6.2,<3.0.0" }, { name = "pymongo", marker = "extra == 'offline-storage'", specifier = ">=4.0.0,<5.0.0" }, - { name = "pypdf2", marker = "extra == 'offline-docs'", specifier = ">=3.0.0" }, + { name = "pypdf", marker = "extra == 'offline-docs'", specifier = ">=6.1.0" }, { name = "pypinyin" }, { name = "pypinyin", marker = "extra == 'api'" }, { name = "pytest", marker = "extra == 'evaluation'", specifier = ">=8.4.2" }, @@ -3977,15 +3977,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/ed/494fd0cc1190a7c335e6958eeaee6f373a281869830255c2ed4785dac135/pypdf-6.1.3-py3-none-any.whl", hash = "sha256:eb049195e46f014fc155f566fa20e09d70d4646a9891164ac25fa0cbcfcdbcb5", size = 323863, upload-time = "2025-10-22T16:13:44.174Z" }, ] -[[package]] -name = "pypdf2" -version = "3.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9f/bb/18dc3062d37db6c491392007dfd1a7f524bb95886eb956569ac38a23a784/PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440", size = 227419, upload-time = "2022-12-31T10:36:13.13Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572, upload-time = "2022-12-31T10:36:10.327Z" }, -] - [[package]] name = "pypinyin" version = "0.55.0"