Change default docker image to offline version

• Add lite verion docker image with tiktoken cache • Update docs and build scripts
2025-10-16 16:52:01 +08:00 · 2025-10-16 16:52:01 +08:00 · daeca17f38
commit daeca17f38
parent c61b7bd4f8
10 changed files with 71 additions and 125 deletions
--- a/.github/workflows/docker-build-offline.yml
+++ b/.github/workflows/docker-build-offline.yml
@ -1,10 +1,10 @@
-name: Build Offline Docker Image
+name: Build Lite Docker Image

 on:
  workflow_dispatch:
    inputs:
      _notes_:
-        description: '⚠️ Create offline Docker images only after non-trivial version releases.'
+        description: '⚠️ Create lite Docker images only after non-trivial version releases.'
        required: false
        type: boolean
        default: false
@ -14,7 +14,7 @@ permissions:
  packages: write

 jobs:
-  build-and-push-offline:
+  build-and-push-lite:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
@ -34,12 +34,12 @@ jobs:
          fi
          echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT

-      - name: Prepare offline tag
-        id: offline_tag
+      - name: Prepare lite tag
+        id: lite_tag
        run: |
-          OFFLINE_TAG="${{ steps.get_tag.outputs.tag }}-offline"
-          echo "Offline image tag: $OFFLINE_TAG"
-          echo "offline_tag=$OFFLINE_TAG" >> $GITHUB_OUTPUT
+          LITE_TAG="${{ steps.get_tag.outputs.tag }}-lite"
+          echo "Lite image tag: $LITE_TAG"
+          echo "lite_tag=$LITE_TAG" >> $GITHUB_OUTPUT

      - name: Update version in __init__.py
        run: |
@ -62,14 +62,14 @@ jobs:
        with:
          images: ghcr.io/${{ github.repository }}
          tags: |
-            type=raw,value=${{ steps.offline_tag.outputs.offline_tag }}
-            type=raw,value=offline
+            type=raw,value=${{ steps.lite_tag.outputs.lite_tag }}
+            type=raw,value=lite

-      - name: Build and push offline Docker image
+      - name: Build and push lite Docker image
        uses: docker/build-push-action@v5
        with:
          context: .
-          file: ./Dockerfile.offline
+          file: ./Dockerfile.lite
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
@ -79,6 +79,6 @@ jobs:

      - name: Output image details
        run: |
-          echo "Offline Docker image built and pushed successfully!"
-          echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.offline_tag.outputs.offline_tag }}"
+          echo "Lite Docker image built and pushed successfully!"
+          echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.lite_tag.outputs.lite_tag }}"
          echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}"
--- a/.github/workflows/docker-build-manual.yml
+++ b/.github/workflows/docker-build-manual.yml
@ -64,6 +64,7 @@ jobs:
        uses: docker/build-push-action@v5
        with:
          context: .
+          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@ -66,6 +66,7 @@ jobs:
        uses: docker/build-push-action@v5
        with:
          context: .
+          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
--- a/31
+++ b/31
@ -11,7 +11,7 @@ RUN cd lightrag_webui \
    && bun install --frozen-lockfile \
    && bun run build

-# Python build stage - using uv for package installation
+# Python build stage - using uv for faster package installation
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder

 ENV DEBIAN_FRONTEND=noninteractive
@ -20,7 +20,7 @@ ENV UV_COMPILE_BYTECODE=1

 WORKDIR /app

-# Install system dependencies required by some wheels
+# Install system deps (Rust is required by some wheels)
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        curl \
@ -39,8 +39,8 @@ COPY pyproject.toml .
 COPY setup.py .
 COPY uv.lock .

-# Install project dependencies (base + API extras) without the project to improve caching
-RUN uv sync --frozen --no-dev --extra api --no-install-project --no-editable
+# Install base, API, and offline extras without the project to improve caching
+RUN uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable

 # Copy project sources after dependency layer
 COPY lightrag/ ./lightrag/
@ -49,9 +49,15 @@ COPY lightrag/ ./lightrag/
 COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui

 # Sync project in non-editable mode and ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --no-editable \
+RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

+# Prepare offline cache directory and pre-populate tiktoken data
+# Use uv run to execute commands from the virtual environment
+RUN mkdir -p /app/data/tiktoken \
+    && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
+    if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
+
 # Final stage
 FROM python:3.12-slim

@ -73,20 +79,23 @@ COPY uv.lock .
 # Ensure the installed scripts are on PATH
 ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH

-# Sync dependencies inside the final image using uv
+# Install dependencies with uv sync (uses locked versions from uv.lock)
 # And ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --no-editable \
+RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

-# Create persistent data directories
-RUN mkdir -p /app/data/rag_storage /app/data/inputs
+# Create persistent data directories AFTER package installation
+RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken

-# Docker data directories
+# Copy offline cache into the newly created directory
+COPY --from=builder /app/data/tiktoken /app/data/tiktoken
+
+# Point to the prepared cache
+ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
 ENV WORKING_DIR=/app/data/rag_storage
 ENV INPUT_DIR=/app/data/inputs

 # Expose API port
 EXPOSE 9621

-# Set entrypoint
 ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]
--- a/Dockerfile.offline
+++ b/Dockerfile.offline
@ -11,7 +11,7 @@ RUN cd lightrag_webui \
    && bun install --frozen-lockfile \
    && bun run build

-# Python build stage - using uv for faster package installation
+# Python build stage - using uv for package installation
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder

 ENV DEBIAN_FRONTEND=noninteractive
@ -20,7 +20,7 @@ ENV UV_COMPILE_BYTECODE=1

 WORKDIR /app

-# Install system deps (Rust is required by some wheels)
+# Install system dependencies required by some wheels
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        curl \
@ -39,8 +39,8 @@ COPY pyproject.toml .
 COPY setup.py .
 COPY uv.lock .

-# Install base, API, and offline extras without the project to improve caching
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable
+# Install project dependencies (base + API extras) without the project to improve caching
+RUN uv sync --frozen --no-dev --extra api --no-install-project --no-editable

 # Copy project sources after dependency layer
 COPY lightrag/ ./lightrag/
@ -49,11 +49,11 @@ COPY lightrag/ ./lightrag/
 COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui

 # Sync project in non-editable mode and ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+RUN uv sync --frozen --no-dev --extra api --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

-# Prepare offline cache directory and pre-populate tiktoken data
-# Use uv run to execute commands from the virtual environment
+# Prepare tiktoken cache directory and pre-populate tokenizer data
+# Ignore exit code 2 which indicates assets already cached
 RUN mkdir -p /app/data/tiktoken \
    && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
    if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
@ -79,18 +79,18 @@ COPY uv.lock .
 # Ensure the installed scripts are on PATH
 ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH

-# Install dependencies with uv sync (uses locked versions from uv.lock)
+# Sync dependencies inside the final image using uv
 # And ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+RUN uv sync --frozen --no-dev --extra api --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

-# Create persistent data directories AFTER package installation
+# Create persistent data directories
 RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken

-# Copy offline cache into the newly created directory
+# Copy cached tokenizer assets prepared in the builder stage
 COPY --from=builder /app/data/tiktoken /app/data/tiktoken

-# Point to the prepared cache
+# Docker data directories
 ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
 ENV WORKING_DIR=/app/data/rag_storage
 ENV INPUT_DIR=/app/data/inputs
@ -98,4 +98,5 @@ ENV INPUT_DIR=/app/data/inputs
 # Expose API port
 EXPOSE 9621

+# Set entrypoint
 ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]
--- a/docker-build-all.sh
+++ b/docker-build-all.sh
@ -3,8 +3,8 @@ set -e

 # Configuration
 IMAGE_NAME="ghcr.io/hkuds/lightrag"
-DOCKERFILE="Dockerfile.offline"
-TAG="offline"
+DOCKERFILE="Dockerfile"
+TAG="latest"

 # Get version
 VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "dev")
@ -16,13 +16,13 @@ docker buildx build \
  --platform linux/amd64,linux/arm64 \
  --file ${DOCKERFILE} \
  --tag ${IMAGE_NAME}:${TAG} \
-  --tag ${IMAGE_NAME}:${VERSION}-${TAG} \
-  --push \
+  --tag ${IMAGE_NAME}:${VERSION} \
+  --load \
  .

 echo "✓ Build complete!"
 echo "Image pushed: ${IMAGE_NAME}:${TAG}"
-echo "Version tag: ${IMAGE_NAME}:${VERSION}-${TAG}"
+echo "Version tag: ${IMAGE_NAME}:${VERSION}"

 # Verify
 docker buildx imagetools inspect ${IMAGE_NAME}:${TAG}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -14,11 +14,8 @@ services:
      - ./data/inputs:/app/data/inputs
      - ./config.ini:/app/config.ini
      - ./.env:/app/.env
-      # - ./data/tiktoken:/app/data/tiktoken
    env_file:
      - .env
-    environment:
-      - TIKTOKEN_CACHE_DIR=/app/data/tiktoken
    restart: unless-stopped
    extra_hosts:
      - "host.docker.internal:host-gateway"
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@ -82,11 +82,9 @@ docker-compose down
 docker-compose up
 ```

-### Offline docker deployment
+### Offline deployment

-LightRAG provide an docker image can be deployment in offline environments where internet access is limited or unavailable. All you need to do is modify `docker-compose.yml`: change image tag from `latest` to `offline`.
-
-> Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, are outside the scope of offline installation support. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service.
+Software packages requiring `transformers`, `torch`, or `cuda` will is not preinstalled in the dokcer images. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, can not be used in an off line enviroment. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service.

 ## 📦 Build Multi-Architecture Docker Images

@ -167,12 +165,13 @@ docker buildx build \
  .
 ```

-**Offline image:**
+**Lite image:**
+
 ```bash
 docker buildx build \
  --platform linux/amd64,linux/arm64 \
-  --file Dockerfile.offline \
-  --tag ghcr.io/hkuds/lightrag:offline \
+  --file Dockerfile.lite \
+  --tag ghcr.io/hkuds/lightrag:lite \
  --load \
  .
 ```
@ -193,12 +192,12 @@ docker buildx build \
  .
 ```

-**Offline image:**
+**Lite image:**
 ```bash
 docker buildx build \
  --platform linux/amd64,linux/arm64 \
-  --file Dockerfile.offline \
-  --tag ghcr.io/hkuds/lightrag:offline \
+  --file Dockerfile.lite \
+  --tag ghcr.io/hkuds/lightrag:lite \
  --push \
  .
 ```
@ -214,10 +213,9 @@ VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v1.0.0")
 # Build with multiple tags
 docker buildx build \
  --platform linux/amd64,linux/arm64 \
-  --file Dockerfile.offline \
-  --tag ghcr.io/hkuds/lightrag:offline \
-  --tag ghcr.io/hkuds/lightrag:${VERSION}-offline \
-  --tag ghcr.io/hkuds/lightrag:offline-latest \
+  --file Dockerfile \
+  --tag ghcr.io/hkuds/lightrag:latest \
+  --tag ghcr.io/hkuds/lightrag:${VERSION} \
  --push \
  .
 ```
@ -228,7 +226,7 @@ After building, verify the multi-architecture manifest:

 ```bash
 # Inspect image manifest
-docker buildx imagetools inspect ghcr.io/hkuds/lightrag:offline
+docker buildx imagetools inspect ghcr.io/hkuds/lightrag:latest

 # Expected output shows multiple platforms:
 # Name:      ghcr.io/hkuds/lightrag:offline
@ -236,19 +234,6 @@ docker buildx imagetools inspect ghcr.io/hkuds/lightrag:offline
 # Platforms: linux/amd64, linux/arm64
 ```

-Pull and test specific architectures:
-
-```bash
-# Pull AMD64 version
-docker pull --platform linux/amd64 ghcr.io/hkuds/lightrag:offline
-
-# Pull ARM64 version
-docker pull --platform linux/arm64 ghcr.io/hkuds/lightrag:offline
-
-# Test run
-docker run --rm -p 9621:9621 ghcr.io/hkuds/lightrag:offline
-```
-
 ### 5. Troubleshooting

 #### Build Time is Very Slow
@ -323,54 +308,3 @@ docker buildx prune
 4. **Monitor resources**: Ensure sufficient disk space before building
 5. **Test both architectures**: Pull and test each platform variant
 6. **Use .dockerignore**: Exclude unnecessary files to speed up build context transfer
-
-### 9. Build Script Example
-
-For convenience, create a build script `build-and-push.sh`:
-
-```bash
-#!/bin/bash
-set -e
-
-# Configuration
-IMAGE_NAME="ghcr.io/hkuds/lightrag"
-DOCKERFILE="Dockerfile.offline"
-TAG="offline"
-
-# Get version
-VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "dev")
-
-echo "Building ${IMAGE_NAME}:${TAG} (version: ${VERSION})"
-
-# Create builder if not exists
-if ! docker buildx inspect multiarch-builder &>/dev/null; then
-    echo "Creating buildx builder..."
-    docker buildx create --name multiarch-builder --use
-    docker buildx inspect --bootstrap
-else
-    docker buildx use multiarch-builder
-fi
-
-# Build and push
-docker buildx build \
-  --platform linux/amd64,linux/arm64 \
-  --file ${DOCKERFILE} \
-  --tag ${IMAGE_NAME}:${TAG} \
-  --tag ${IMAGE_NAME}:${VERSION}-${TAG} \
-  --push \
-  .
-
-echo "✓ Build complete!"
-echo "Image pushed: ${IMAGE_NAME}:${TAG}"
-echo "Version tag: ${IMAGE_NAME}:${VERSION}-${TAG}"
-
-# Verify
-docker buildx imagetools inspect ${IMAGE_NAME}:${TAG}
-```
-
-Make it executable and run:
-
-```bash
-chmod +x build-and-push.sh
-./build-and-push.sh
-```
--- a/docs/OfflineDeployment.md
+++ b/docs/OfflineDeployment.md
@ -1,6 +1,9 @@
 # LightRAG Offline Deployment Guide

 This guide provides comprehensive instructions for deploying LightRAG in offline environments where internet access is limited or unavailable.
+
+If you deploy LightRAG using Docker, there is no need to refer to this document, as the LightRAG Docker image is pre-configured for offline operation.
+
 > Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, are outside the scope of offline installation support. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service.

 ## Table of Contents
--- a/env.example
+++ b/env.example
@ -23,7 +23,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # WORKING_DIR=<absolute_path_for_working_dir>

 ### Tiktoken cache directory (Store cached files in this folder for offline deployment)
-# TIKTOKEN_CACHE_DIR=./temp/tiktoken
+# TIKTOKEN_CACHE_DIR=/app/data/tiktoken

 ### Ollama Emulating Model and Tag
 # OLLAMA_EMULATING_MODEL_NAME=lightrag