diff --git a/Dockerfile b/Dockerfile index 0f4d6711..67aa92b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -87,14 +87,6 @@ RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \ # Create persistent data directories AFTER package installation RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken -# Copy pre-built knowledge graph if available (optional) -# This allows shipping Docker images with pre-indexed data, saving: -# - Embedding API costs (no need to re-index) -# - Startup time in production (instant query capability) -# - Consistent embeddings across deployments -# Copy will fail silently if files don't exist (handled by .dockerignore) -COPY --chown=root:root rag_storage/graph_chunk_entity_relation.graphml /app/data/rag_storage/ - # Copy offline cache into the newly created directory COPY --from=builder /app/data/tiktoken /app/data/tiktoken diff --git a/Dockerfile.prebuilt-graph b/Dockerfile.prebuilt-graph new file mode 100644 index 00000000..9416625f --- /dev/null +++ b/Dockerfile.prebuilt-graph @@ -0,0 +1,108 @@ +# Frontend build stage +FROM oven/bun:1 AS frontend-builder + +WORKDIR /app + +# Copy frontend source code +COPY lightrag_webui/ ./lightrag_webui/ + +# Build frontend assets for inclusion in the API package +RUN cd lightrag_webui \ + && bun install --frozen-lockfile \ + && bun run build + +# Python build stage - using uv for faster package installation +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder + +ENV DEBIAN_FRONTEND=noninteractive +ENV UV_SYSTEM_PYTHON=1 +ENV UV_COMPILE_BYTECODE=1 + +WORKDIR /app + +# Install system deps (Rust is required by some wheels) +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + build-essential \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}" + +# Ensure shared data directory exists for uv caches +RUN mkdir -p /root/.local/share/uv + +# Copy project metadata and sources +COPY pyproject.toml . +COPY setup.py . +COPY uv.lock . + +# Install base, API, and offline extras without the project to improve caching +RUN uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable + +# Copy project sources after dependency layer +COPY lightrag/ ./lightrag/ + +# Include pre-built frontend assets from the previous stage +COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui + +# Sync project in non-editable mode and ensure pip is available for runtime installs +RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \ + && /app/.venv/bin/python -m ensurepip --upgrade + +# Prepare offline cache directory and pre-populate tiktoken data +# Use uv run to execute commands from the virtual environment +RUN mkdir -p /app/data/tiktoken \ + && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \ + if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi + +# Final stage +FROM python:3.12-slim + +WORKDIR /app + +# Install uv for package management +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +ENV UV_SYSTEM_PYTHON=1 + +# Allow callers to override the location of the pre-built NetworkX graph +ARG GRAPH_SOURCE=rag_storage/ + +# Copy installed packages and application code +COPY --from=builder /root/.local /root/.local +COPY --from=builder /app/.venv /app/.venv +COPY --from=builder /app/lightrag ./lightrag +COPY pyproject.toml . +COPY setup.py . +COPY uv.lock . + +# Ensure the installed scripts are on PATH +ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH + +# Install dependencies with uv sync (uses locked versions from uv.lock) +# And ensure pip is available for runtime installs +RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \ + && /app/.venv/bin/python -m ensurepip --upgrade + +# Create persistent data directories AFTER package installation +RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken + +# Copy pre-built knowledge graph assets for bundled deployments +# Using a build arg allows substituting a different directory or archive at build time. +COPY --chown=root:root ${GRAPH_SOURCE} /app/data/rag_storage/ + +# Copy offline cache into the newly created directory +COPY --from=builder /app/data/tiktoken /app/data/tiktoken + +# Point to the prepared cache +ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken +ENV WORKING_DIR=/app/data/rag_storage +ENV INPUT_DIR=/app/data/inputs + +# Expose API port +EXPOSE 9621 + +ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"] diff --git a/rag_storage/README.md b/rag_storage/README.md index be401e74..346fedcd 100644 --- a/rag_storage/README.md +++ b/rag_storage/README.md @@ -1,6 +1,6 @@ # Pre-built Knowledge Graph for Docker Deployments -This directory can contain a pre-built knowledge graph that will be included in Docker images, enabling instant query capability without re-indexing. +This directory can contain a pre-built knowledge graph that will be included in Docker images built with `Dockerfile.prebuilt-graph`, enabling instant query capability without re-indexing. ## Benefits @@ -32,7 +32,7 @@ This will create `graph_chunk_entity_relation.graphml` in your local `rag_storag ls rag_storage/graph_chunk_entity_relation.graphml # Build Docker image (graph will be included automatically) -docker build -t lightrag:prebuilt . +docker build -f Dockerfile.prebuilt-graph -t lightrag:prebuilt . ``` ### 3. Deploy Without Re-indexing @@ -53,11 +53,20 @@ curl -X POST http://localhost:9621/query \ ### Dockerfile Integration -The Dockerfile includes this optional step: +`Dockerfile.prebuilt-graph` includes this optional step: ```dockerfile # Copy pre-built knowledge graph if available (optional) -COPY --chown=root:root rag_storage/graph_chunk_entity_relation.graphml /app/data/rag_storage/ +ARG GRAPH_SOURCE=rag_storage/ +COPY --chown=root:root ${GRAPH_SOURCE} /app/data/rag_storage/ +``` + +Specify an alternate directory or archive when building: + +```bash +docker build -f Dockerfile.prebuilt-graph \ + --build-arg GRAPH_SOURCE=artifacts/graphs/ \ + -t lightrag:prebuilt . ``` ### .dockerignore Configuration