From c88df79246227cc66b5c8c5b037a913f8e85bee2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20thorwirth?= <moonstruxx@googlemail.com>
Date: Wed, 19 Nov 2025 01:48:43 +0100
Subject: [PATCH] fix: correct CUDA deps implementation

- Fix uv pip install syntax to use --python flag instead of incorrect venv activation
- Add proper CPU-only PyTorch installation in main and mineru environments
- Update entrypoint scripts to check for pre-installed packages first
- Ensure proper fallback to runtime installation when needed

The previous commit only included documentation files, this commit adds the actual implementation.
---
 Dockerfile           | 41 +++++++++++++++++++++++++++++++++++++++++
 common/misc_utils.py | 22 +++++++++++++++++++---
 docker/entrypoint.sh | 44 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 96 insertions(+), 11 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b16a0d7d5..3756b72a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -148,6 +148,44 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
     fi; \
     uv sync --python 3.10 --frozen
 
+# Pre-install CPU-only PyTorch to prevent GPU version from being installed at runtime
+# This significantly reduces image size by avoiding CUDA dependencies
+RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
+    if [ "$NEED_MIRROR" == "1" ]; then \
+        uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple; \
+    else \
+        uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; \
+    fi
+
+# Pre-install optional dependencies that are normally installed at runtime
+# This prevents downloading dependencies on every container startup
+RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
+    if [ "$NEED_MIRROR" == "1" ]; then \
+        uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling==2.58.0"; \
+    else \
+        uv pip install --no-cache-dir "docling==2.58.0"; \
+    fi
+
+# Pre-install mineru in a separate directory that can be used at runtime
+# Install CPU-only PyTorch first to avoid GPU dependencies unless explicitly needed
+# Set BUILD_MINERU=1 during build to include mineru, otherwise skip to save space
+ARG BUILD_MINERU=1
+RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
+    if [ "$BUILD_MINERU" = "1" ]; then \
+        mkdir -p /ragflow/uv_tools && \
+        uv venv /ragflow/uv_tools/.venv && \
+        if [ "$NEED_MIRROR" == "1" ]; then \
+            uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple && \
+            uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple; \
+        else \
+            uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+            uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]"; \
+        fi; \
+    else \
+        echo "Skipping mineru installation (BUILD_MINERU=0)"; \
+        mkdir -p /ragflow/uv_tools; \
+    fi
+
 COPY web web
 COPY docs docs
 RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
@@ -171,6 +209,9 @@ ENV VIRTUAL_ENV=/ragflow/.venv
 COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 
+# Copy pre-installed mineru environment
+COPY --from=builder /ragflow/uv_tools /ragflow/uv_tools
+
 ENV PYTHONPATH=/ragflow/
 
 COPY web web
diff --git a/common/misc_utils.py b/common/misc_utils.py
index ae56fe5c4..032c83943 100644
--- a/common/misc_utils.py
+++ b/common/misc_utils.py
@@ -101,8 +101,24 @@ def once(func):
 @once
 def pip_install_torch():
     device = os.getenv("DEVICE", "cpu")
-    if device=="cpu":
+    if device == "cpu":
         return
+    
     logging.info("Installing pytorch")
-    pkg_names = ["torch>=2.5.0,<3.0.0"]
-    subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names])
+    
+    # Check if GPU PyTorch is explicitly requested
+    gpu_pytorch = os.getenv("GPU_PYTORCH", "false").lower() == "true"
+    
+    if gpu_pytorch:
+        # Install GPU version of PyTorch
+        logging.info("Installing GPU PyTorch (large download with CUDA dependencies)")
+        pkg_names = ["torch>=2.5.0,<3.0.0"]
+        subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names])
+    else:
+        # Install CPU-only version to avoid CUDA dependencies
+        logging.info("Installing CPU-only PyTorch to avoid CUDA dependencies")
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install", 
+            "torch>=2.5.0,<3.0.0", "torchvision",
+            "--index-url", "https://download.pytorch.org/whl/cpu"
+        ])
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index f24d5baac..391cc5618 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -188,10 +188,18 @@ function start_mcp_server() {
 
 function ensure_docling() {
     [[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; }
+    
+    # Check if docling is already available in the virtual environment
+    if python3 -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" 2>/dev/null; then
+        echo "[docling] found in virtual environment"
+        return 0
+    fi
+    
+    # Fallback to runtime installation if not found (shouldn't happen with optimized Dockerfile)
+    echo "[docling] not found, installing at runtime..."
     python3 -c 'import pip' >/dev/null 2>&1 || python3 -m ensurepip --upgrade || true
     DOCLING_PIN="${DOCLING_VERSION:-==2.58.0}"
-    python3 -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" \
-      || python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}"
+    python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}"
 }
 
 function ensure_mineru() {
@@ -203,13 +211,26 @@ function ensure_mineru() {
     local venv_dir="${default_prefix}/.venv"
     local exe="${MINERU_EXECUTABLE:-${venv_dir}/bin/mineru}"
 
+    # Check if the pre-installed mineru is available
     if [[ -x "${exe}" ]]; then
-      echo "[mineru] found: ${exe}"
+      echo "[mineru] found pre-installed: ${exe}"
       export MINERU_EXECUTABLE="${exe}"
-      return 0
+      
+      # Verify it works
+      if "${MINERU_EXECUTABLE}" --help >/dev/null 2>&1; then
+          echo "[mineru] pre-installed version is working"
+          return 0
+      else
+          echo "[mineru] pre-installed version not working, will reinstall"
+      fi
     fi
 
-    echo "[mineru] not found, bootstrapping with uv ..."
+    # Check if mineru was excluded during build
+    if [[ ! -d "${venv_dir}" ]]; then
+        echo "[mineru] not included in build (BUILD_MINERU=0), installing at runtime..."
+    else
+        echo "[mineru] not found or not working, bootstrapping with uv ..."
+    fi
 
     (
         set -e
@@ -217,9 +238,12 @@ function ensure_mineru() {
         cd "${default_prefix}"
         [[ -d "${venv_dir}" ]] || uv venv "${venv_dir}"
 
-        source "${venv_dir}/bin/activate"
-        uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple
-        deactivate
+        # Install CPU-only PyTorch first to avoid CUDA dependencies
+        echo "[mineru] installing CPU-only PyTorch to avoid CUDA packages..."
+        uv pip install --python "${venv_dir}/bin/python" torch torchvision --index-url https://download.pytorch.org/whl/cpu
+        
+        # Then install mineru
+        uv pip install --python "${venv_dir}/bin/python" -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple
     )
     export MINERU_EXECUTABLE="${exe}"
     if ! "${MINERU_EXECUTABLE}" --help >/dev/null 2>&1; then
@@ -227,6 +251,10 @@ function ensure_mineru() {
       return 1
     fi
     echo "[mineru] installed: ${MINERU_EXECUTABLE}"
+      echo "[mineru] installation failed: ${MINERU_EXECUTABLE} not working" >&2
+      return 1
+    fi
+    echo "[mineru] installed: ${MINERU_EXECUTABLE}"
 }
 # -----------------------------------------------------------------------------
 # Start components based on flags