- Add reportlab>=4.4.1 to dependencies (pyproject.toml) - Add fonts-freefont-ttf and fonts-noto-cjk to Dockerfile for Unicode support - Implement automatic CJK font detection and switching: - Latin content uses user-selected font (Helvetica, Times-Roman, Courier) - CJK/Arabic/Hebrew/Thai/Hindi content auto-switches to STSong-Light CID font - Add documentation about automatic font switching behavior - Fix frontend null safety in use-selelct-filters.ts - Add DocsGenerator component to agent constants Fixes missing glyphs (black squares) issue for Chinese, Japanese, Korean, Arabic, Hebrew, Thai, and Hindi characters in generated PDFs.
204 lines
8.1 KiB
Docker
204 lines
8.1 KiB
Docker
# base stage
|
|
FROM ubuntu:24.04 AS base
|
|
USER root
|
|
SHELL ["/bin/bash", "-c"]
|
|
|
|
ARG NEED_MIRROR=0
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy models downloaded via download_deps.py
|
|
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
|
tar --exclude='.*' -cf - \
|
|
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
|
/huggingface.co/InfiniFlow/deepdoc \
|
|
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
|
|
|
# https://github.com/chrismattmann/tika-python
|
|
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
cp -r /deps/nltk_data /root/ && \
|
|
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
|
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
|
|
|
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Setup apt
|
|
# Python package and implicit dependencies:
|
|
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
|
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
|
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
|
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
apt update && \
|
|
apt --no-install-recommends install -y ca-certificates; \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
|
sed -i 's|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list.d/ubuntu.sources; \
|
|
fi; \
|
|
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
|
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
|
chmod 1777 /tmp && \
|
|
apt update && \
|
|
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
|
apt install -y pkg-config libicu-dev libgdiplus && \
|
|
apt install -y default-jdk && \
|
|
apt install -y libatk-bridge2.0-0 && \
|
|
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
|
apt install -y libjemalloc-dev && \
|
|
apt install -y nginx unzip curl wget git vim less && \
|
|
apt install -y ghostscript && \
|
|
apt install -y pandoc && \
|
|
apt install -y texlive && \
|
|
apt install -y fonts-freefont-ttf fonts-noto-cjk
|
|
|
|
# Install uv
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
mkdir -p /etc/uv && \
|
|
echo 'python-install-mirror = "https://registry.npmmirror.com/-/binary/python-build-standalone/"' > /etc/uv/uv.toml && \
|
|
echo '[[index]]' >> /etc/uv/uv.toml && \
|
|
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
|
|
echo 'default = true' >> /etc/uv/uv.toml; \
|
|
fi; \
|
|
tar xzf /deps/uv-x86_64-unknown-linux-gnu.tar.gz \
|
|
&& cp uv-x86_64-unknown-linux-gnu/* /usr/local/bin/ \
|
|
&& rm -rf uv-x86_64-unknown-linux-gnu \
|
|
&& uv python install 3.11
|
|
|
|
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
|
ENV PATH=/root/.local/bin:$PATH
|
|
|
|
# nodejs 12.22 on Ubuntu 22.04 is too old
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
|
apt purge -y nodejs npm cargo && \
|
|
apt autoremove -y && \
|
|
apt update && \
|
|
apt install -y nodejs
|
|
|
|
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
|
RUN apt update && apt install -y curl build-essential \
|
|
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
|
# Use TUNA mirrors for rustup/rust dist files \
|
|
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
|
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
|
echo "Using TUNA mirrors for Rustup."; \
|
|
fi; \
|
|
# Force curl to use HTTP/1.1 \
|
|
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
|
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
|
|
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
|
|
RUN cargo --version && rustc --version
|
|
|
|
# Add msssql ODBC driver
|
|
# macOS ARM64 environment, install msodbcsql18.
|
|
# general x86_64 environment, install msodbcsql17.
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
|
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
|
apt update && \
|
|
arch="$(uname -m)"; \
|
|
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
|
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
|
else \
|
|
# x86_64 or others
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
|
fi || \
|
|
{ echo "Failed to install ODBC driver"; exit 1; }
|
|
|
|
|
|
|
|
# Add dependencies of selenium
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
|
unzip /chrome-linux64.zip && \
|
|
mv chrome-linux64 /opt/chrome && \
|
|
ln -s /opt/chrome/chrome /usr/local/bin/
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
|
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
|
mv chromedriver /usr/local/bin/ && \
|
|
rm -f /usr/bin/google-chrome
|
|
|
|
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
|
# aspose-slides on linux/arm64 is unavailable
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
if [ "$(uname -m)" = "x86_64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
|
elif [ "$(uname -m)" = "aarch64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
|
fi
|
|
|
|
|
|
# builder stage
|
|
FROM base AS builder
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# install dependencies from uv.lock file
|
|
COPY pyproject.toml uv.lock ./
|
|
|
|
# https://github.com/astral-sh/uv/issues/10462
|
|
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|pypi.org|pypi.tuna.tsinghua.edu.cn|g' uv.lock; \
|
|
else \
|
|
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
|
|
fi; \
|
|
uv sync --python 3.12 --frozen
|
|
|
|
COPY web web
|
|
COPY docs docs
|
|
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
|
cd web && npm install && npm run build
|
|
|
|
COPY .git /ragflow/.git
|
|
|
|
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
|
version_info="$version_info"; \
|
|
echo "RAGFlow version: $version_info"; \
|
|
echo $version_info > /ragflow/VERSION
|
|
|
|
# production stage
|
|
FROM base AS production
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy Python environment and packages
|
|
ENV VIRTUAL_ENV=/ragflow/.venv
|
|
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
|
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
|
|
|
ENV PYTHONPATH=/ragflow/
|
|
|
|
COPY web web
|
|
COPY admin admin
|
|
COPY api api
|
|
COPY conf conf
|
|
COPY deepdoc deepdoc
|
|
COPY rag rag
|
|
COPY agent agent
|
|
COPY graphrag graphrag
|
|
COPY agentic_reasoning agentic_reasoning
|
|
COPY pyproject.toml uv.lock ./
|
|
COPY mcp mcp
|
|
COPY plugin plugin
|
|
COPY common common
|
|
|
|
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
|
COPY docker/entrypoint.sh ./
|
|
RUN chmod +x ./entrypoint*.sh
|
|
|
|
# Copy compiled web pages
|
|
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
|
|
|
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
|
ENTRYPOINT ["./entrypoint.sh"]
|