- Fix uv pip install syntax to use --python flag instead of incorrect venv activation - Add proper CPU-only PyTorch installation in main and mineru environments - Update entrypoint scripts to check for pre-installed packages first - Ensure proper fallback to runtime installation when needed The previous commit only included documentation files, this commit adds the actual implementation.
239 lines
10 KiB
Docker
239 lines
10 KiB
Docker
# base stage
|
|
FROM ubuntu:22.04 AS base
|
|
USER root
|
|
SHELL ["/bin/bash", "-c"]
|
|
|
|
ARG NEED_MIRROR=0
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy models downloaded via download_deps.py
|
|
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
|
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
|
tar --exclude='.*' -cf - \
|
|
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
|
/huggingface.co/InfiniFlow/deepdoc \
|
|
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
|
|
|
# https://github.com/chrismattmann/tika-python
|
|
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
cp -r /deps/nltk_data /root/ && \
|
|
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
|
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
|
|
|
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Setup apt
|
|
# Python package and implicit dependencies:
|
|
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
|
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
|
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
|
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
|
sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
|
fi; \
|
|
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
|
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
|
chmod 1777 /tmp && \
|
|
apt update && \
|
|
apt --no-install-recommends install -y ca-certificates && \
|
|
apt update && \
|
|
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
|
apt install -y pkg-config libicu-dev libgdiplus && \
|
|
apt install -y default-jdk && \
|
|
apt install -y libatk-bridge2.0-0 && \
|
|
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
|
apt install -y libjemalloc-dev && \
|
|
apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
|
|
apt install -y ghostscript
|
|
|
|
RUN if [ "$NEED_MIRROR" == "1" ]; then \
|
|
pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
|
pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
|
|
mkdir -p /etc/uv && \
|
|
echo "[[index]]" > /etc/uv/uv.toml && \
|
|
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
|
|
echo "default = true" >> /etc/uv/uv.toml; \
|
|
fi; \
|
|
pipx install uv
|
|
|
|
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
|
ENV PATH=/root/.local/bin:$PATH
|
|
|
|
# nodejs 12.22 on Ubuntu 22.04 is too old
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
|
apt purge -y nodejs npm cargo && \
|
|
apt autoremove -y && \
|
|
apt update && \
|
|
apt install -y nodejs
|
|
|
|
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
|
RUN apt update && apt install -y curl build-essential \
|
|
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
|
# Use TUNA mirrors for rustup/rust dist files
|
|
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
|
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
|
echo "Using TUNA mirrors for Rustup."; \
|
|
fi; \
|
|
# Force curl to use HTTP/1.1
|
|
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
|
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
|
|
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
|
|
RUN cargo --version && rustc --version
|
|
|
|
# Add msssql ODBC driver
|
|
# macOS ARM64 environment, install msodbcsql18.
|
|
# general x86_64 environment, install msodbcsql17.
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
|
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
|
apt update && \
|
|
arch="$(uname -m)"; \
|
|
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
|
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
|
else \
|
|
# x86_64 or others
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
|
fi || \
|
|
{ echo "Failed to install ODBC driver"; exit 1; }
|
|
|
|
|
|
|
|
# Add dependencies of selenium
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
|
unzip /chrome-linux64.zip && \
|
|
mv chrome-linux64 /opt/chrome && \
|
|
ln -s /opt/chrome/chrome /usr/local/bin/
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
|
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
|
mv chromedriver /usr/local/bin/ && \
|
|
rm -f /usr/bin/google-chrome
|
|
|
|
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
|
# aspose-slides on linux/arm64 is unavailable
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
if [ "$(uname -m)" = "x86_64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
|
elif [ "$(uname -m)" = "aarch64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
|
fi
|
|
|
|
|
|
# builder stage
|
|
FROM base AS builder
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# install dependencies from uv.lock file
|
|
COPY pyproject.toml uv.lock ./
|
|
|
|
# https://github.com/astral-sh/uv/issues/10462
|
|
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|pypi.org|pypi.tuna.tsinghua.edu.cn|g' uv.lock; \
|
|
else \
|
|
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
|
|
fi; \
|
|
uv sync --python 3.10 --frozen
|
|
|
|
# Pre-install CPU-only PyTorch to prevent GPU version from being installed at runtime
|
|
# This significantly reduces image size by avoiding CUDA dependencies
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple; \
|
|
else \
|
|
uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; \
|
|
fi
|
|
|
|
# Pre-install optional dependencies that are normally installed at runtime
|
|
# This prevents downloading dependencies on every container startup
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling==2.58.0"; \
|
|
else \
|
|
uv pip install --no-cache-dir "docling==2.58.0"; \
|
|
fi
|
|
|
|
# Pre-install mineru in a separate directory that can be used at runtime
|
|
# Install CPU-only PyTorch first to avoid GPU dependencies unless explicitly needed
|
|
# Set BUILD_MINERU=1 during build to include mineru, otherwise skip to save space
|
|
ARG BUILD_MINERU=1
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$BUILD_MINERU" = "1" ]; then \
|
|
mkdir -p /ragflow/uv_tools && \
|
|
uv venv /ragflow/uv_tools/.venv && \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple && \
|
|
uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple; \
|
|
else \
|
|
uv pip install --python /ragflow/uv_tools/.venv/bin/python torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
|
|
uv pip install --python /ragflow/uv_tools/.venv/bin/python -U "mineru[core]"; \
|
|
fi; \
|
|
else \
|
|
echo "Skipping mineru installation (BUILD_MINERU=0)"; \
|
|
mkdir -p /ragflow/uv_tools; \
|
|
fi
|
|
|
|
COPY web web
|
|
COPY docs docs
|
|
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
|
cd web && npm install && npm run build
|
|
|
|
COPY .git /ragflow/.git
|
|
|
|
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
|
version_info="$version_info"; \
|
|
echo "RAGFlow version: $version_info"; \
|
|
echo $version_info > /ragflow/VERSION
|
|
|
|
# production stage
|
|
FROM base AS production
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy Python environment and packages
|
|
ENV VIRTUAL_ENV=/ragflow/.venv
|
|
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
|
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
|
|
|
# Copy pre-installed mineru environment
|
|
COPY --from=builder /ragflow/uv_tools /ragflow/uv_tools
|
|
|
|
ENV PYTHONPATH=/ragflow/
|
|
|
|
COPY web web
|
|
COPY admin admin
|
|
COPY api api
|
|
COPY conf conf
|
|
COPY deepdoc deepdoc
|
|
COPY rag rag
|
|
COPY agent agent
|
|
COPY graphrag graphrag
|
|
COPY agentic_reasoning agentic_reasoning
|
|
COPY pyproject.toml uv.lock ./
|
|
COPY mcp mcp
|
|
COPY plugin plugin
|
|
COPY common common
|
|
|
|
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
|
COPY docker/entrypoint.sh ./
|
|
RUN chmod +x ./entrypoint*.sh
|
|
|
|
# Copy compiled web pages
|
|
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
|
|
|
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
|
ENTRYPOINT ["./entrypoint.sh"]
|