Add offline Docker build support with embedded models and cache
- Add offline Dockerfile with tiktoken cache - Create GitHub workflow for offline builds - Update dockerignore for cleaner builds - Exclude dev dirs from package setup - Remove tiktoken volume from compose
This commit is contained in:
parent
83b10a52ad
commit
6d1ae40478
5 changed files with 172 additions and 1 deletions
|
|
@ -28,6 +28,12 @@ Makefile
|
||||||
# Exclude other projects
|
# Exclude other projects
|
||||||
/tests
|
/tests
|
||||||
/scripts
|
/scripts
|
||||||
|
/data
|
||||||
|
/dickens
|
||||||
|
/reproduce
|
||||||
|
/output_complete
|
||||||
|
/rag_storage
|
||||||
|
/inputs
|
||||||
|
|
||||||
# Python version manager file
|
# Python version manager file
|
||||||
.python-version
|
.python-version
|
||||||
|
|
|
||||||
77
.github/workflows/docker-build-offline.yml
vendored
Normal file
77
.github/workflows/docker-build-offline.yml
vendored
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
name: Build Offline Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push-offline:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Get latest tag
|
||||||
|
id: get_tag
|
||||||
|
run: |
|
||||||
|
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
|
||||||
|
if [ -z "$LATEST_TAG" ]; then
|
||||||
|
LATEST_TAG="sha-$(git rev-parse --short HEAD)"
|
||||||
|
echo "No tags found, using commit SHA: $LATEST_TAG"
|
||||||
|
else
|
||||||
|
echo "Latest tag found: $LATEST_TAG"
|
||||||
|
fi
|
||||||
|
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Prepare offline tag
|
||||||
|
id: offline_tag
|
||||||
|
run: |
|
||||||
|
OFFLINE_TAG="${{ steps.get_tag.outputs.tag }}-offline"
|
||||||
|
echo "Offline image tag: $OFFLINE_TAG"
|
||||||
|
echo "offline_tag=$OFFLINE_TAG" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Update version in __init__.py
|
||||||
|
run: |
|
||||||
|
sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py
|
||||||
|
cat lightrag/__init__.py | grep __version__
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ghcr.io/${{ github.repository }}
|
||||||
|
tags: |
|
||||||
|
type=raw,value=${{ steps.offline_tag.outputs.offline_tag }}
|
||||||
|
|
||||||
|
- name: Build and push offline Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.offline
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Output image details
|
||||||
|
run: |
|
||||||
|
echo "Offline Docker image built and pushed successfully!"
|
||||||
|
echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.offline_tag.outputs.offline_tag }}"
|
||||||
|
echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}"
|
||||||
87
Dockerfile.offline
Normal file
87
Dockerfile.offline
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
# Frontend build stage
|
||||||
|
FROM oven/bun:1 AS frontend-builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy frontend source code
|
||||||
|
COPY lightrag_webui/ ./lightrag_webui/
|
||||||
|
|
||||||
|
# Build frontend assets for inclusion in the API package
|
||||||
|
RUN cd lightrag_webui \
|
||||||
|
&& bun install --frozen-lockfile \
|
||||||
|
&& bun run build
|
||||||
|
|
||||||
|
# Python build stage
|
||||||
|
FROM python:3.12-slim AS builder
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Upgrade packaging tools and install system deps (Rust is required by some wheels)
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
build-essential \
|
||||||
|
pkg-config \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
|
||||||
|
ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
|
||||||
|
|
||||||
|
# Copy project metadata and sources
|
||||||
|
COPY pyproject.toml .
|
||||||
|
COPY setup.py .
|
||||||
|
COPY requirements-offline*.txt ./
|
||||||
|
COPY lightrag/ ./lightrag/
|
||||||
|
|
||||||
|
# Include pre-built frontend assets from the previous stage
|
||||||
|
COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
|
||||||
|
|
||||||
|
# Install LightRAG with API extras and all offline dependencies
|
||||||
|
RUN pip install --user --no-cache-dir --use-pep517 .[api]
|
||||||
|
RUN pip install --user --no-cache-dir -r requirements-offline.txt
|
||||||
|
|
||||||
|
# Prepare offline cache directory and pre-populate tiktoken data
|
||||||
|
RUN mkdir -p /app/data/tiktoken \
|
||||||
|
&& lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
|
||||||
|
if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
|
||||||
|
|
||||||
|
# Final stage
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel
|
||||||
|
|
||||||
|
# Copy installed packages and application code
|
||||||
|
COPY --from=builder /root/.local /root/.local
|
||||||
|
COPY --from=builder /app/lightrag ./lightrag
|
||||||
|
COPY pyproject.toml .
|
||||||
|
COPY setup.py .
|
||||||
|
COPY requirements-offline*.txt ./
|
||||||
|
|
||||||
|
# Ensure the installed scripts are on PATH
|
||||||
|
ENV PATH=/root/.local/bin:$PATH
|
||||||
|
|
||||||
|
# Install editable package for runtime (re-using cached wheels) and verify extras
|
||||||
|
# IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error
|
||||||
|
RUN pip install --no-cache-dir --use-pep517 ".[api]"
|
||||||
|
RUN pip install --no-cache-dir -r requirements-offline.txt
|
||||||
|
|
||||||
|
# Create persistent data directories AFTER package installation
|
||||||
|
RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken
|
||||||
|
|
||||||
|
# Copy offline cache into the newly created directory
|
||||||
|
COPY --from=builder /app/data/tiktoken /app/data/tiktoken
|
||||||
|
|
||||||
|
# Point to the prepared cache
|
||||||
|
ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
|
||||||
|
ENV WORKING_DIR=/app/data/rag_storage
|
||||||
|
ENV INPUT_DIR=/app/data/inputs
|
||||||
|
|
||||||
|
# Expose API port
|
||||||
|
EXPOSE 9621
|
||||||
|
|
||||||
|
ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]
|
||||||
|
|
@ -12,9 +12,9 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./data/rag_storage:/app/data/rag_storage
|
- ./data/rag_storage:/app/data/rag_storage
|
||||||
- ./data/inputs:/app/data/inputs
|
- ./data/inputs:/app/data/inputs
|
||||||
- ./data/tiktoken:/app/data/tiktoken
|
|
||||||
- ./config.ini:/app/config.ini
|
- ./config.ini:/app/config.ini
|
||||||
- ./.env:/app/.env
|
- ./.env:/app/.env
|
||||||
|
# - ./data/tiktoken:/app/data/tiktoken
|
||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
environment:
|
environment:
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,7 @@ Repository = "https://github.com/HKUDS/LightRAG"
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
include = ["lightrag*"]
|
include = ["lightrag*"]
|
||||||
|
exclude = ["data*", "tests*", "scripts*", "examples*", "dickens*", "reproduce*", "output_complete*", "rag_storage*", "inputs*"]
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
include-package-data = true
|
include-package-data = true
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue