From 1afb70f563552d650c5f9baed2baff24f2aad975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20MANSUY?= Date: Thu, 4 Dec 2025 19:19:00 +0800 Subject: [PATCH] cherry-pick e0966b65 --- Dockerfile | 40 +++-- Dockerfile.lite | 14 +- docs/DockerDeployment.md | 331 ++++++--------------------------------- 3 files changed, 83 insertions(+), 302 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7a61270a..aaa3c84b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,5 @@ +# syntax=docker/dockerfile:1 + # Frontend build stage FROM oven/bun:1 AS frontend-builder @@ -7,11 +9,12 @@ WORKDIR /app COPY lightrag_webui/ ./lightrag_webui/ # Build frontend assets for inclusion in the API package -RUN cd lightrag_webui \ +RUN --mount=type=cache,target=/root/.bun/install/cache \ + cd lightrag_webui \ && bun install --frozen-lockfile \ && bun run build -# Python build stage - using uv for package installation +# Python build stage - using uv for faster package installation FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder ENV DEBIAN_FRONTEND=noninteractive @@ -20,7 +23,7 @@ ENV UV_COMPILE_BYTECODE=1 WORKDIR /app -# Install system dependencies required by some wheels +# Install system deps (Rust is required by some wheels) RUN apt-get update \ && apt-get install -y --no-install-recommends \ curl \ @@ -39,8 +42,9 @@ COPY pyproject.toml . COPY setup.py . COPY uv.lock . -# Install project dependencies (base + API extras) without the project to improve caching -RUN uv sync --frozen --no-dev --extra api --no-install-project --no-editable +# Install base, API, and offline extras without the project to improve caching +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable # Copy project sources after dependency layer COPY lightrag/ ./lightrag/ @@ -49,9 +53,16 @@ COPY lightrag/ ./lightrag/ COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui # Sync project in non-editable mode and ensure pip is available for runtime installs -RUN uv sync --frozen --no-dev --extra api --no-editable \ +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --extra offline --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade +# Prepare offline cache directory and pre-populate tiktoken data +# Use uv run to execute commands from the virtual environment +RUN mkdir -p /app/data/tiktoken \ + && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \ + if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi + # Final stage FROM python:3.12-slim @@ -73,19 +84,24 @@ COPY uv.lock . # Ensure the installed scripts are on PATH ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH -# Sync dependencies inside the final image using uv -RUN uv sync --frozen --no-dev --extra api --no-editable \ +# Install dependencies with uv sync (uses locked versions from uv.lock) +# And ensure pip is available for runtime installs +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --extra offline --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade -# Create persistent data directories -RUN mkdir -p /app/data/rag_storage /app/data/inputs +# Create persistent data directories AFTER package installation +RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken -# Docker data directories +# Copy offline cache into the newly created directory +COPY --from=builder /app/data/tiktoken /app/data/tiktoken + +# Point to the prepared cache +ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken ENV WORKING_DIR=/app/data/rag_storage ENV INPUT_DIR=/app/data/inputs # Expose API port EXPOSE 9621 -# Set entrypoint ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"] diff --git a/Dockerfile.lite b/Dockerfile.lite index 25ec8fe5..3f488886 100644 --- a/Dockerfile.lite +++ b/Dockerfile.lite @@ -1,3 +1,5 @@ +# syntax=docker/dockerfile:1 + # Frontend build stage FROM oven/bun:1 AS frontend-builder @@ -7,7 +9,8 @@ WORKDIR /app COPY lightrag_webui/ ./lightrag_webui/ # Build frontend assets for inclusion in the API package -RUN cd lightrag_webui \ +RUN --mount=type=cache,target=/root/.bun/install/cache \ + cd lightrag_webui \ && bun install --frozen-lockfile \ && bun run build @@ -40,7 +43,8 @@ COPY setup.py . COPY uv.lock . # Install project dependencies (base + API extras) without the project to improve caching -RUN uv sync --frozen --no-dev --extra api --no-install-project --no-editable +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --no-install-project --no-editable # Copy project sources after dependency layer COPY lightrag/ ./lightrag/ @@ -49,7 +53,8 @@ COPY lightrag/ ./lightrag/ COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui # Sync project in non-editable mode and ensure pip is available for runtime installs -RUN uv sync --frozen --no-dev --extra api --no-editable \ +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Prepare tiktoken cache directory and pre-populate tokenizer data @@ -81,7 +86,8 @@ ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH # Sync dependencies inside the final image using uv # And ensure pip is available for runtime installs -RUN uv sync --frozen --no-dev --extra api --no-editable \ +RUN --mount=type=cache,target=/root/.local/share/uv \ + uv sync --frozen --no-dev --extra api --no-editable \ && /app/.venv/bin/python -m ensurepip --upgrade # Create persistent data directories diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md index d5a7a3dc..575359b4 100644 --- a/docs/DockerDeployment.md +++ b/docs/DockerDeployment.md @@ -59,10 +59,19 @@ LightRAG can be configured using environment variables in the `.env` file: Docker instructions work the same on all platforms with Docker Desktop installed. +### Build Optimization + +The Dockerfile uses BuildKit cache mounts to significantly improve build performance: + +- **Automatic cache management**: BuildKit is automatically enabled via `# syntax=docker/dockerfile:1` directive +- **Faster rebuilds**: Only downloads changed dependencies when `uv.lock` or `bun.lock` files are modified +- **Efficient package caching**: UV and Bun package downloads are cached across builds +- **No manual configuration needed**: Works out of the box in Docker Compose and GitHub Actions + ### Start LightRAG server: ```bash -docker-compose up -d +docker compose up -d ``` LightRAG Server uses the following paths for data storage: @@ -77,300 +86,50 @@ data/ To update the Docker container: ```bash -docker-compose pull -docker-compose down -docker-compose up +docker compose pull +docker compose down +docker compose up ``` -### Offline docker deployment +### Offline deployment -LightRAG provide an docker image can be deployment in offline environments where internet access is limited or unavailable. All you need to do is modify `docker-compose.yml`: change image tag from `latest` to `offline`. +Software packages requiring `transformers`, `torch`, or `cuda` will is not preinstalled in the dokcer images. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, can not be used in an off line enviroment. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service. -> Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, are outside the scope of offline installation support. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service. +## 📦 Build Docker Images -## 📦 Build Multi-Architecture Docker Images +### For local development and testing -### Prerequisites +```bash +# Build and run with Docker Compose (BuildKit automatically enabled) +docker compose up --build + +# Or explicitly enable BuildKit if needed +DOCKER_BUILDKIT=1 docker compose up --build +``` + +**Note**: BuildKit is automatically enabled by the `# syntax=docker/dockerfile:1` directive in the Dockerfile, ensuring optimal caching performance. + +### For production release + + **multi-architecture build and push**: + +```bash +# Use the provided build script +./docker-build-push.sh +``` + +**The build script will**: + +- Check Docker registry login status +- Create/use buildx builder automatically +- Build for both AMD64 and ARM64 architectures +- Push to GitHub Container Registry (ghcr.io) +- Verify the multi-architecture manifest + +**Prerequisites**: Before building multi-architecture images, ensure you have: - Docker 20.10+ with Buildx support - Sufficient disk space (20GB+ recommended for offline image) - Registry access credentials (if pushing images) - -### 1. Setup Buildx Builder - -Create and configure a multi-architecture builder: - -```bash -# Create a new buildx builder instance -docker buildx create --name multiarch-builder --use - -# Start and verify the builder -docker buildx inspect --bootstrap - -# Verify supported platforms -docker buildx inspect multiarch-builder -``` - -You should see support for `linux/amd64` and `linux/arm64` in the output. - -### 2. Registry Authentication - -#### For GitHub Container Registry (ghcr.io) - -**Option 1: Using Personal Access Token** - -1. Create a GitHub Personal Access Token: - - Go to GitHub Settings → Developer settings → Personal access tokens → Tokens (classic) - - Generate new token with `write:packages` and `read:packages` permissions - - Copy the token - -2. Login to registry: - ```bash - echo "YOUR_GITHUB_TOKEN" | docker login ghcr.io -u YOUR_GITHUB_USERNAME --password-stdin - ``` - -**Option 2: Using GitHub CLI** - -```bash -gh auth token | docker login ghcr.io -u YOUR_GITHUB_USERNAME --password-stdin -``` - -#### For Docker Hub - -```bash -docker login -# Enter your Docker Hub username and password -``` - -#### For Other Registries - -```bash -docker login your-registry.example.com -# Enter your credentials -``` - -### 3. Build Commands - -#### A. Local Build (No Push) - -Build multi-architecture images locally without pushing to registry: - -**Normal image:** -```bash -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file Dockerfile \ - --tag ghcr.io/hkuds/lightrag:latest \ - --load \ - . -``` - -**Offline image:** -```bash -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file Dockerfile.offline \ - --tag ghcr.io/hkuds/lightrag:offline \ - --load \ - . -``` - -> **Note**: `--load` loads the image to local Docker, but only supports single platform. For multi-platform, use `--push` instead. - -#### B. Build and Push to Registry - -Build and directly push to container registry: - -**Normal image:** -```bash -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file Dockerfile \ - --tag ghcr.io/hkuds/lightrag:latest \ - --push \ - . -``` - -**Offline image:** -```bash -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file Dockerfile.offline \ - --tag ghcr.io/hkuds/lightrag:offline \ - --push \ - . -``` - -#### C. Build with Multiple Tags - -Add version tags alongside latest: - -```bash -# Get version from git tag -VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v1.0.0") - -# Build with multiple tags -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file Dockerfile.offline \ - --tag ghcr.io/hkuds/lightrag:offline \ - --tag ghcr.io/hkuds/lightrag:${VERSION}-offline \ - --tag ghcr.io/hkuds/lightrag:offline-latest \ - --push \ - . -``` - -### 4. Verify Built Images - -After building, verify the multi-architecture manifest: - -```bash -# Inspect image manifest -docker buildx imagetools inspect ghcr.io/hkuds/lightrag:offline - -# Expected output shows multiple platforms: -# Name: ghcr.io/hkuds/lightrag:offline -# MediaType: application/vnd.docker.distribution.manifest.list.v2+json -# Platforms: linux/amd64, linux/arm64 -``` - -Pull and test specific architectures: - -```bash -# Pull AMD64 version -docker pull --platform linux/amd64 ghcr.io/hkuds/lightrag:offline - -# Pull ARM64 version -docker pull --platform linux/arm64 ghcr.io/hkuds/lightrag:offline - -# Test run -docker run --rm -p 9621:9621 ghcr.io/hkuds/lightrag:offline -``` - -### 5. Troubleshooting - -#### Build Time is Very Slow - -**Cause**: Building ARM64 on AMD64 (or vice versa) requires QEMU emulation, which is slower. - -**Solutions**: -- Use remote cache (`--cache-from/--cache-to`) for faster subsequent builds -- Build on native architecture when possible -- Be patient - initial multi-arch builds take 30-60 minutes - -#### "No space left on device" Error - -**Cause**: Insufficient disk space for build layers and cache. - -**Solutions**: -```bash -# Clean up Docker system -docker system prune -a - -# Clean up buildx cache -docker buildx prune - -# Check disk space -df -h -``` - -#### "failed to solve: failed to push" Error - -**Cause**: Not logged into the registry or insufficient permissions. - -**Solutions**: -1. Verify you're logged in: `docker login ghcr.io` -2. Check you have push permissions to the repository -3. Verify the image name matches your repository path - -#### Builder Not Found - -**Cause**: Buildx builder not created or not set as current. - -**Solutions**: -```bash -# List builders -docker buildx ls - -# Create and use new builder -docker buildx create --name multiarch-builder --use - -# Or switch to existing builder -docker buildx use multiarch-builder -``` - -### 6. Cleanup - -Remove builder when done: - -```bash -# Switch back to default builder -docker buildx use default - -# Remove multiarch builder -docker buildx rm multiarch-builder - -# Prune build cache -docker buildx prune -``` - -### 7. Best Practices - -1. **Use specific tags**: Avoid only using `latest`, include version tags -2. **Verify platforms**: Always check the manifest after pushing -4. **Monitor resources**: Ensure sufficient disk space before building -5. **Test both architectures**: Pull and test each platform variant -6. **Use .dockerignore**: Exclude unnecessary files to speed up build context transfer - -### 9. Build Script Example - -For convenience, create a build script `build-and-push.sh`: - -```bash -#!/bin/bash -set -e - -# Configuration -IMAGE_NAME="ghcr.io/hkuds/lightrag" -DOCKERFILE="Dockerfile.offline" -TAG="offline" - -# Get version -VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "dev") - -echo "Building ${IMAGE_NAME}:${TAG} (version: ${VERSION})" - -# Create builder if not exists -if ! docker buildx inspect multiarch-builder &>/dev/null; then - echo "Creating buildx builder..." - docker buildx create --name multiarch-builder --use - docker buildx inspect --bootstrap -else - docker buildx use multiarch-builder -fi - -# Build and push -docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --file ${DOCKERFILE} \ - --tag ${IMAGE_NAME}:${TAG} \ - --tag ${IMAGE_NAME}:${VERSION}-${TAG} \ - --push \ - . - -echo "✓ Build complete!" -echo "Image pushed: ${IMAGE_NAME}:${TAG}" -echo "Version tag: ${IMAGE_NAME}:${VERSION}-${TAG}" - -# Verify -docker buildx imagetools inspect ${IMAGE_NAME}:${TAG} -``` - -Make it executable and run: - -```bash -chmod +x build-and-push.sh -./build-and-push.sh -```