From 4d0654b2bc90108c7da87151c229c3c8f5a973a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20MANSUY?= Date: Thu, 4 Dec 2025 19:18:35 +0800 Subject: [PATCH] cherry-pick 433ec813 --- Dockerfile.offline | 19 +++- constraints-offline.txt | 160 +++++++++++++++++++++++++++++++ pyproject.toml | 22 +++-- requirements-offline-docs.txt | 4 +- requirements-offline-llm.txt | 6 +- requirements-offline-storage.txt | 18 ++-- requirements-offline.txt | 26 ++--- 7 files changed, 216 insertions(+), 39 deletions(-) create mode 100644 constraints-offline.txt diff --git a/Dockerfile.offline b/Dockerfile.offline index b2bd5d58..bad17908 100644 --- a/Dockerfile.offline +++ b/Dockerfile.offline @@ -34,14 +34,19 @@ ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}" COPY pyproject.toml . COPY setup.py . COPY requirements-offline*.txt ./ +COPY constraints-offline.txt . COPY lightrag/ ./lightrag/ # Include pre-built frontend assets from the previous stage COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui -# Install LightRAG with API extras and all offline dependencies -RUN pip install --user --no-cache-dir --use-pep517 .[api] -RUN pip install --user --no-cache-dir -r requirements-offline.txt +# Install LightRAG with API extras and all offline dependencies in a single step +# This prevents version conflicts from multiple installation passes +# Use constraints file for reproducible builds with exact versions +RUN pip install --user --no-cache-dir --use-pep517 \ + --upgrade-strategy=only-if-needed \ + --constraint constraints-offline.txt \ + .[api] -r requirements-offline.txt # Prepare offline cache directory and pre-populate tiktoken data RUN mkdir -p /app/data/tiktoken \ @@ -61,14 +66,18 @@ COPY --from=builder /app/lightrag ./lightrag COPY pyproject.toml . COPY setup.py . COPY requirements-offline*.txt ./ +COPY constraints-offline.txt . # Ensure the installed scripts are on PATH ENV PATH=/root/.local/bin:$PATH # Install editable package for runtime (re-using cached wheels) and verify extras # IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error -RUN pip install --no-cache-dir --use-pep517 ".[api]" -RUN pip install --no-cache-dir -r requirements-offline.txt +# Use single installation to prevent version conflicts with exact version constraints +RUN pip install --no-cache-dir --use-pep517 \ + --upgrade-strategy=only-if-needed \ + --constraint constraints-offline.txt \ + ".[api]" -r requirements-offline.txt # Create persistent data directories AFTER package installation RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken diff --git a/constraints-offline.txt b/constraints-offline.txt new file mode 100644 index 00000000..a9845611 --- /dev/null +++ b/constraints-offline.txt @@ -0,0 +1,160 @@ +# Exact version constraints based on successful local installation with uv pip install ".[offline]" +# Generated: 2025-10-15 +# Use with: pip install --constraint constraints-offline.txt -r requirements-offline.txt + +# Core ML/AI packages +torch==2.2.2 +transformers==4.57.1 +tokenizers==0.22.1 +torchvision==0.17.2 +huggingface-hub==0.35.3 +safetensors==0.6.2 +accelerate==1.10.1 + +# LLM Provider packages +openai==1.109.1 +anthropic==0.69.0 +ollama==0.6.0 +zhipuai==2.1.5.20250825 +voyageai==0.3.5 + +# AWS/Boto packages +aioboto3==15.2.0 +aiobotocore==2.24.2 +boto3==1.40.18 +botocore==1.40.18 + +# LlamaIndex packages +llama-index==0.14.4 +llama-index-core==0.14.4 +llama-index-cli==0.5.3 +llama-index-embeddings-openai==0.5.1 +llama-index-indices-managed-llama-cloud==0.9.4 +llama-index-instrumentation==0.4.2 +llama-index-llms-openai==0.6.4 +llama-index-readers-file==0.5.4 +llama-index-readers-llama-parse==0.5.1 +llama-index-workflows==2.8.0 +llama-cloud==0.1.35 +llama-cloud-services==0.6.54 +llama-parse==0.6.54 + +# Document processing packages +docling==2.57.0 +docling-core==2.48.4 +docling-ibm-models==3.9.1 +docling-parse==4.5.0 +pypdf==6.1.1 +pypdfium2==4.30.0 +python-docx==1.2.0 +python-pptx==1.0.2 +openpyxl==3.1.5 +lxml==5.4.0 + +# Storage backends +redis==6.4.0 +neo4j==6.0.2 +pymilvus==2.6.2 +pymongo==4.15.3 +asyncpg +qdrant-client==1.15.1 + +# Data processing +pandas==2.2.3 +numpy +scipy==1.16.2 + +# Computer vision +opencv-python==4.11.0.86 +opencv-python-headless==4.11.0.86 +pillow==11.3.0 + +# NLP packages +nltk==3.9.2 +langchain-core==0.3.79 +langchain-text-splitters==0.3.11 +langsmith==0.4.35 + +# Authentication +pyjwt==2.8.0 + +# Utility packages +aioitertools==0.12.0 +aiolimiter==1.2.1 +aiosqlite==0.21.0 +beautifulsoup4==4.14.2 +cachetools==6.2.1 +colorama==0.4.6 +colorlog==6.9.0 +dataclasses-json==0.6.7 +defusedxml==0.7.1 +deprecated==1.2.18 +dill==0.4.0 +dirtyjson==1.0.8 +dnspython==2.8.0 +docstring-parser==0.17.0 +faker==37.11.0 +filetype==1.2.0 +fsspec==2025.9.0 +greenlet==3.2.4 +griffe==1.14.0 +grpcio==1.75.1 +h2==4.3.0 +hpack==4.1.0 +hyperframe==6.1.0 +jinja2==3.1.6 +jmespath==1.0.1 +joblib==1.5.2 +jsonlines==3.1.0 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonref==1.1.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +latex2mathml==3.78.1 +markdown-it-py==4.0.0 +marko==2.2.1 +markupsafe==3.0.3 +marshmallow==3.26.1 +mdurl==0.1.2 +mpire==2.10.2 +mpmath==1.3.0 +multiprocess==0.70.18 +mypy-extensions==1.1.1 +nest-asyncio==1.6.0 +omegaconf==2.3.0 +orjson==3.11.3 +pluggy==1.6.0 +polyfactory==2.22.2 +portalocker==3.2.0 +protobuf==6.32.1 +pyclipper==1.3.0.post6 +pydantic-settings==2.11.0 +pygments==2.19.2 +pylatexenc==2.10 +rapidocr==3.4.2 +referencing==0.37.0 +requests-toolbelt==1.0.0 +rich==14.2.0 +rpds-py==0.27.1 +rtree==1.4.1 +s3transfer==0.13.1 +semchunk==2.2.2 +shapely==2.1.2 +shellingham==1.5.4 +soupsieve==2.8 +sqlalchemy==2.0.44 +striprtf==0.0.26 +sympy==1.14.0 +tabulate==0.9.0 +typer==0.19.2 +typing-inspect==0.9.0 +ujson==5.11.0 +wrapt==1.17.3 +zstandard==0.25.0 +antlr4-python3-runtime==4.9.3 + +# Special packages (platform-specific) +# Note: These may not be available on all platforms +# ocrmac==1.0.0 # macOS only +# pyobjc-* packages are macOS only diff --git a/pyproject.toml b/pyproject.toml index d9ef50a9..41b7b2c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,13 @@ classifiers = [ dependencies = [ "aiohttp", "configparser", + "dotenv", "future", "json_repair", "nano-vectordb", "networkx", "numpy", - "pandas>=2.0.0,<2.4.0", + "pandas>=2.0.0,<2.3.0", "pipmaster", "pydantic", "pypinyin", @@ -44,13 +45,14 @@ api = [ # Core dependencies "aiohttp", "configparser", + "dotenv", "future", "json_repair", "nano-vectordb", "networkx", "numpy", - "openai>=1.0.0,<3.0.0", - "pandas>=2.0.0,<2.4.0", + "openai>=1.0.0,<2.0.0", + "pandas>=2.0.0,<2.3.0", "pipmaster", "pydantic", "pypinyin", @@ -80,16 +82,16 @@ api = [ # Offline deployment dependencies (layered design for flexibility) offline-docs = [ # Document processing dependencies - "openpyxl>=3.0.0,<4.0.0", - "pycryptodome>=3.0.0,<4.0.0", - "pypdf>=6.1.0", + "docling>=1.0.0,<3.0.0", + "pypdf2>=3.0.0", "python-docx>=0.8.11,<2.0.0", "python-pptx>=0.6.21,<2.0.0", + "openpyxl>=3.0.0,<4.0.0", ] offline-storage = [ # Storage backend dependencies - "redis>=5.0.0,<8.0.0", + "redis>=5.0.0,<7.0.0", "neo4j>=5.0.0,<7.0.0", "pymilvus>=2.6.2,<3.0.0", "pymongo>=4.0.0,<5.0.0", @@ -99,13 +101,15 @@ offline-storage = [ offline-llm = [ # LLM provider dependencies - "openai>=1.0.0,<3.0.0", + "openai>=1.0.0,<2.0.0", "anthropic>=0.18.0,<1.0.0", "ollama>=0.1.0,<1.0.0", "zhipuai>=2.0.0,<3.0.0", "aioboto3>=12.0.0,<16.0.0", "voyageai>=0.2.0,<1.0.0", "llama-index>=0.9.0,<1.0.0", + "transformers>=4.30.0,<5.0.0", + "torch>=2.0.0,<2.3.0", ] offline = [ @@ -135,7 +139,7 @@ include-package-data = true version = {attr = "lightrag.__version__"} [tool.setuptools.package-data] -lightrag = ["api/webui/**/*", "api/static/**/*"] +lightrag = ["api/webui/**/*"] [tool.ruff] target-version = "py310" diff --git a/requirements-offline-docs.txt b/requirements-offline-docs.txt index 12f02080..2dd6095d 100644 --- a/requirements-offline-docs.txt +++ b/requirements-offline-docs.txt @@ -8,8 +8,8 @@ # Or use constraints: pip install --constraint constraints-offline.txt -r requirements-offline-docs.txt # Document processing dependencies (with version constraints matching pyproject.toml) +docling>=1.0.0,<3.0.0 openpyxl>=3.0.0,<4.0.0 -pycryptodome>=3.0.0,<4.0.0 -pypdf>=6.1.0 +pypdf2>=3.0.0 python-docx>=0.8.11,<2.0.0 python-pptx>=0.6.21,<2.0.0 diff --git a/requirements-offline-llm.txt b/requirements-offline-llm.txt index bcfb1451..f5d6d39b 100644 --- a/requirements-offline-llm.txt +++ b/requirements-offline-llm.txt @@ -10,10 +10,10 @@ # LLM provider dependencies (with version constraints matching pyproject.toml) aioboto3>=12.0.0,<16.0.0 anthropic>=0.18.0,<1.0.0 -google-api-core>=2.0.0,<3.0.0 -google-genai>=1.0.0,<2.0.0 llama-index>=0.9.0,<1.0.0 ollama>=0.1.0,<1.0.0 -openai>=2.0.0,<3.0.0 +openai>=1.0.0,<2.0.0 +torch>=2.0.0,<2.3.0 +transformers>=4.30.0,<5.0.0 voyageai>=0.2.0,<1.0.0 zhipuai>=2.0.0,<3.0.0 diff --git a/requirements-offline-storage.txt b/requirements-offline-storage.txt index 5beb5d44..e08f85a9 100644 --- a/requirements-offline-storage.txt +++ b/requirements-offline-storage.txt @@ -3,12 +3,14 @@ # For offline installation: # pip download -r requirements-offline-storage.txt -d ./packages # pip install --no-index --find-links=./packages -r requirements-offline-storage.txt +# +# Recommended: Use pip install lightrag-hku[offline-storage] for the same effect +# Or use constraints: pip install --constraint constraints-offline.txt -r requirements-offline-storage.txt -asyncpg>=0.29.0 -neo4j>=5.0.0 -protobuf>=5.27.2,<6.0.0 # Required for pymilvus compatibility -pymilvus>=2.6.2 -pymongo>=4.0.0 -qdrant-client>=1.7.0 -# Storage backend dependencies -redis>=5.0.0 +# Storage backend dependencies (with version constraints matching pyproject.toml) +asyncpg>=0.29.0,<1.0.0 +neo4j>=5.0.0,<7.0.0 +pymilvus>=2.6.2,<3.0.0 +pymongo>=4.0.0,<5.0.0 +qdrant-client>=1.7.0,<2.0.0 +redis>=5.0.0,<7.0.0 diff --git a/requirements-offline.txt b/requirements-offline.txt index eb77211f..fd1f70ad 100644 --- a/requirements-offline.txt +++ b/requirements-offline.txt @@ -10,23 +10,25 @@ # LLM provider dependencies (with version constraints matching pyproject.toml) aioboto3>=12.0.0,<16.0.0 anthropic>=0.18.0,<1.0.0 +llama-index>=0.9.0,<1.0.0 +ollama>=0.1.0,<1.0.0 +openai>=1.0.0,<2.0.0 +torch>=2.0.0,<2.3.0 +transformers>=4.30.0,<5.0.0 +voyageai>=0.2.0,<1.0.0 +zhipuai>=2.0.0,<3.0.0 # Storage backend dependencies asyncpg>=0.29.0,<1.0.0 - -# Document processing dependencies -llama-index>=0.9.0,<1.0.0 neo4j>=5.0.0,<7.0.0 -ollama>=0.1.0,<1.0.0 -openai>=2.0.0,<3.0.0 -openpyxl>=3.0.0,<4.0.0 -pycryptodome>=3.0.0,<4.0.0 pymilvus>=2.6.2,<3.0.0 pymongo>=4.0.0,<5.0.0 -pypdf>=6.1.0 +qdrant-client>=1.7.0,<2.0.0 +redis>=5.0.0,<7.0.0 + +# Document processing dependencies +docling>=1.0.0,<3.0.0 +openpyxl>=3.0.0,<4.0.0 +pypdf2>=3.0.0 python-docx>=0.8.11,<2.0.0 python-pptx>=0.6.21,<2.0.0 -qdrant-client>=1.7.0,<2.0.0 -redis>=5.0.0,<8.0.0 -voyageai>=0.2.0,<1.0.0 -zhipuai>=2.0.0,<3.0.0