From c88df79246227cc66b5c8c5b037a913f8e85bee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20thorwirth?= Date: Wed, 19 Nov 2025 01:48:43 +0100 Subject: [PATCH] fix: correct CUDA deps implementation - Fix uv pip install syntax to use --python flag instead of incorrect venv activation - Add proper CPU-only PyTorch installation in main and mineru environments - Update entrypoint scripts to check for pre-installed packages first - Ensure proper fallback to runtime installation when needed The previous commit only included documentation files, this commit adds the actual implementation. --- Dockerfile | 41 +++++++++++++++++++++++++++++++++++++++++ common/misc_utils.py | 22 +++++++++++++++++++--- docker/entrypoint.sh | 44 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 96 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index b16a0d7d5..3756b72a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,6 +148,44 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ fi; \ uv sync --python 3.10 --frozen +# Pre-install CPU-only PyTorch to prevent GPU version from being installed at runtime +# This significantly reduces image size by avoiding CUDA dependencies +RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ + if [ "$NEED_MIRROR" == "1" ]; then \ + uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple; \ + else \ + uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; \ + fi + +# Pre-install optional dependencies that are normally installed at runtime +# This prevents downloading dependencies on every container startup +RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ + if [ "$NEED_MIRROR" == "1" ]; then \ + uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling==2.58.0"; \ + else \ + uv pip install --no-cache-dir "docling==2.58.0"; \ + fi + +# Pre-install mineru in a separate directory that can be used at runtime +# Install CPU-only PyTorch first to avoid GPU dependencies unless explicitly needed +# Set BUILD_MINERU=1 during build to include mineru, otherwise skip to save space +ARG BUILD_MINERU=1 +RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \ + if [ "$BUILD_MINERU" = "1" ]; then \ + mkdir -p /ragflow/uv_tools && \ + uv venv /ragflow/uv_tools/.venv && \ + if [ "$NEED_MIRROR" == "1" ]; then \ + uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple && \ + uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple; \ + else \ + uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ + uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]"; \ + fi; \ + else \ + echo "Skipping mineru installation (BUILD_MINERU=0)"; \ + mkdir -p /ragflow/uv_tools; \ + fi + COPY web web COPY docs docs RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \ @@ -171,6 +209,9 @@ ENV VIRTUAL_ENV=/ragflow/.venv COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" +# Copy pre-installed mineru environment +COPY --from=builder /ragflow/uv_tools /ragflow/uv_tools + ENV PYTHONPATH=/ragflow/ COPY web web diff --git a/common/misc_utils.py b/common/misc_utils.py index ae56fe5c4..032c83943 100644 --- a/common/misc_utils.py +++ b/common/misc_utils.py @@ -101,8 +101,24 @@ def once(func): @once def pip_install_torch(): device = os.getenv("DEVICE", "cpu") - if device=="cpu": + if device == "cpu": return + logging.info("Installing pytorch") - pkg_names = ["torch>=2.5.0,<3.0.0"] - subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names]) + + # Check if GPU PyTorch is explicitly requested + gpu_pytorch = os.getenv("GPU_PYTORCH", "false").lower() == "true" + + if gpu_pytorch: + # Install GPU version of PyTorch + logging.info("Installing GPU PyTorch (large download with CUDA dependencies)") + pkg_names = ["torch>=2.5.0,<3.0.0"] + subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names]) + else: + # Install CPU-only version to avoid CUDA dependencies + logging.info("Installing CPU-only PyTorch to avoid CUDA dependencies") + subprocess.check_call([ + sys.executable, "-m", "pip", "install", + "torch>=2.5.0,<3.0.0", "torchvision", + "--index-url", "https://download.pytorch.org/whl/cpu" + ]) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index f24d5baac..391cc5618 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -188,10 +188,18 @@ function start_mcp_server() { function ensure_docling() { [[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; } + + # Check if docling is already available in the virtual environment + if python3 -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" 2>/dev/null; then + echo "[docling] found in virtual environment" + return 0 + fi + + # Fallback to runtime installation if not found (shouldn't happen with optimized Dockerfile) + echo "[docling] not found, installing at runtime..." python3 -c 'import pip' >/dev/null 2>&1 || python3 -m ensurepip --upgrade || true DOCLING_PIN="${DOCLING_VERSION:-==2.58.0}" - python3 -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" \ - || python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}" + python3 -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}" } function ensure_mineru() { @@ -203,13 +211,26 @@ function ensure_mineru() { local venv_dir="${default_prefix}/.venv" local exe="${MINERU_EXECUTABLE:-${venv_dir}/bin/mineru}" + # Check if the pre-installed mineru is available if [[ -x "${exe}" ]]; then - echo "[mineru] found: ${exe}" + echo "[mineru] found pre-installed: ${exe}" export MINERU_EXECUTABLE="${exe}" - return 0 + + # Verify it works + if "${MINERU_EXECUTABLE}" --help >/dev/null 2>&1; then + echo "[mineru] pre-installed version is working" + return 0 + else + echo "[mineru] pre-installed version not working, will reinstall" + fi fi - echo "[mineru] not found, bootstrapping with uv ..." + # Check if mineru was excluded during build + if [[ ! -d "${venv_dir}" ]]; then + echo "[mineru] not included in build (BUILD_MINERU=0), installing at runtime..." + else + echo "[mineru] not found or not working, bootstrapping with uv ..." + fi ( set -e @@ -217,9 +238,12 @@ function ensure_mineru() { cd "${default_prefix}" [[ -d "${venv_dir}" ]] || uv venv "${venv_dir}" - source "${venv_dir}/bin/activate" - uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple - deactivate + # Install CPU-only PyTorch first to avoid CUDA dependencies + echo "[mineru] installing CPU-only PyTorch to avoid CUDA packages..." + uv pip install --python "${venv_dir}/bin/python" torch torchvision --index-url https://download.pytorch.org/whl/cpu + + # Then install mineru + uv pip install --python "${venv_dir}/bin/python" -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple ) export MINERU_EXECUTABLE="${exe}" if ! "${MINERU_EXECUTABLE}" --help >/dev/null 2>&1; then @@ -227,6 +251,10 @@ function ensure_mineru() { return 1 fi echo "[mineru] installed: ${MINERU_EXECUTABLE}" + echo "[mineru] installation failed: ${MINERU_EXECUTABLE} not working" >&2 + return 1 + fi + echo "[mineru] installed: ${MINERU_EXECUTABLE}" } # ----------------------------------------------------------------------------- # Start components based on flags