Migrate from pip to uv package manager for faster builds

• Replace pip with uv in Dockerfile
• Remove constraints-offline.txt
• Add uv.lock for dependency pinning
• Use uv sync --frozen for builds
This commit is contained in:
yangdx 2025-10-16 01:21:03 +08:00
parent 7f223d5ada
commit 466de2070d
4 changed files with 5724 additions and 185 deletions

View file

@ -11,16 +11,17 @@ RUN cd lightrag_webui \
&& bun install --frozen-lockfile \
&& bun run build
# Python build stage
FROM python:3.12-slim AS builder
# Python build stage - using uv for faster package installation
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
ENV DEBIAN_FRONTEND=noninteractive
ENV UV_SYSTEM_PYTHON=1
ENV UV_COMPILE_BYTECODE=1
WORKDIR /app
# Upgrade packaging tools and install system deps (Rust is required by some wheels)
RUN pip install --upgrade pip setuptools wheel \
&& apt-get update \
# Install system deps (Rust is required by some wheels)
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
build-essential \
@ -30,27 +31,25 @@ RUN pip install --upgrade pip setuptools wheel \
ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
# Ensure shared data directory exists for uv caches
RUN mkdir -p /root/.local/share/uv
# Copy project metadata and sources
COPY pyproject.toml .
COPY setup.py .
COPY requirements-offline*.txt ./
COPY constraints-offline.txt .
COPY uv.lock .
COPY lightrag/ ./lightrag/
# Include pre-built frontend assets from the previous stage
COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
# Install LightRAG with API extras and all offline dependencies in a single step
# This prevents version conflicts from multiple installation passes
# Use constraints file for reproducible builds with exact versions
RUN pip install --user --no-cache-dir --use-pep517 \
--upgrade-strategy=only-if-needed \
--constraint constraints-offline.txt \
.[api] -r requirements-offline.txt
# Install base and API extras so CLI helpers work during build
RUN uv sync --frozen --no-dev --extra api
# Prepare offline cache directory and pre-populate tiktoken data
# Use uv run to execute commands from the virtual environment
RUN mkdir -p /app/data/tiktoken \
&& lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
&& uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
# Final stage
@ -58,26 +57,25 @@ FROM python:3.12-slim
WORKDIR /app
RUN pip install --upgrade pip setuptools wheel
# Install uv for package management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
ENV UV_SYSTEM_PYTHON=1
# Copy installed packages and application code
COPY --from=builder /root/.local /root/.local
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/lightrag ./lightrag
COPY pyproject.toml .
COPY setup.py .
COPY requirements-offline*.txt ./
COPY constraints-offline.txt .
COPY uv.lock .
# Ensure the installed scripts are on PATH
ENV PATH=/root/.local/bin:$PATH
ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH
# Install editable package for runtime (re-using cached wheels) and verify extras
# Install dependencies with uv sync (uses locked versions from uv.lock)
# IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error
# Use single installation to prevent version conflicts with exact version constraints
RUN pip install --no-cache-dir --use-pep517 \
--upgrade-strategy=only-if-needed \
--constraint constraints-offline.txt \
".[api]" -r requirements-offline.txt
RUN uv sync --frozen --no-dev --extra api
# Create persistent data directories AFTER package installation
RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken

View file

@ -1,160 +0,0 @@
# Exact version constraints based on successful local installation with uv pip install ".[offline]"
# Generated: 2025-10-15
# Use with: pip install --constraint constraints-offline.txt -r requirements-offline.txt
accelerate==1.10.1
# AWS/Boto packages
aioboto3==15.2.0
aiobotocore==2.24.2
# Utility packages
aioitertools==0.12.0
aiolimiter==1.2.1
aiosqlite==0.21.0
anthropic==0.69.0
antlr4-python3-runtime==4.9.3
asyncpg
beautifulsoup4==4.14.2
boto3==1.40.18
botocore==1.40.18
cachetools==6.2.1
colorama==0.4.6
colorlog==6.9.0
dataclasses-json==0.6.7
defusedxml==0.7.1
deprecated==1.2.18
dill==0.4.0
dirtyjson==1.0.8
dnspython==2.8.0
# Document processing packages
docling==2.57.0
docling-core==2.48.4
docling-ibm-models==3.9.1
docling-parse==4.5.0
docstring-parser==0.17.0
faker==37.11.0
filetype==1.2.0
fsspec==2025.9.0
greenlet==3.2.4
griffe==1.14.0
grpcio==1.75.1
h2==4.3.0
hpack==4.1.0
huggingface-hub==0.35.3
hyperframe==6.1.0
jinja2==3.1.6
jmespath==1.0.1
joblib==1.5.2
jsonlines==3.1.0
jsonpatch==1.33
jsonpointer==3.0.0
jsonref==1.1.0
jsonschema==4.25.1
jsonschema-specifications==2025.9.1
langchain-core==0.3.79
langchain-text-splitters==0.3.11
langsmith==0.4.35
latex2mathml==3.78.1
llama-cloud==0.1.35
llama-cloud-services==0.6.54
# LlamaIndex packages
llama-index==0.14.4
llama-index-cli==0.5.3
llama-index-core==0.14.4
llama-index-embeddings-openai==0.5.1
llama-index-indices-managed-llama-cloud==0.9.4
llama-index-instrumentation==0.4.2
llama-index-llms-openai==0.6.4
llama-index-readers-file==0.5.4
llama-index-readers-llama-parse==0.5.1
llama-index-workflows==2.8.0
llama-parse==0.6.54
lxml==5.4.0
markdown-it-py==4.0.0
marko==2.2.1
markupsafe==3.0.3
marshmallow==3.26.1
mdurl==0.1.2
mpire==2.10.2
mpmath==1.3.0
multiprocess==0.70.18
mypy-extensions==1.1.1
neo4j==6.0.2
nest-asyncio==1.6.0
# NLP packages
nltk==3.9.2
numpy
ollama==0.6.0
omegaconf==2.3.0
# LLM Provider packages
openai==1.109.1
# Computer vision
opencv-python==4.11.0.86
opencv-python-headless==4.11.0.86
openpyxl==3.1.5
orjson==3.11.3
# Data processing
pandas==2.2.3
pillow==11.3.0
pluggy==1.6.0
polyfactory==2.22.2
portalocker==3.2.0
protobuf==6.32.1
pyclipper==1.3.0.post6
pydantic-settings==2.11.0
pygments==2.19.2
# Authentication
pyjwt==2.8.0
pylatexenc==2.10
pymilvus==2.6.2
pymongo==4.15.3
pypdf==6.1.1
pypdfium2==4.30.0
python-docx==1.2.0
python-pptx==1.0.2
qdrant-client==1.15.1
rapidocr==3.4.2
# Storage backends
redis==6.4.0
referencing==0.37.0
requests-toolbelt==1.0.0
rich==14.2.0
rpds-py==0.27.1
rtree==1.4.1
s3transfer==0.13.1
safetensors==0.6.2
scipy==1.16.2
semchunk==2.2.2
shapely==2.1.2
shellingham==1.5.4
soupsieve==2.8
sqlalchemy==2.0.44
striprtf==0.0.26
sympy==1.14.0
tabulate==0.9.0
tokenizers==0.22.1
# Core ML/AI packages
torch==2.2.2
torchvision==0.17.2
transformers==4.57.1
typer==0.19.2
typing-inspect==0.9.0
ujson==5.11.0
voyageai==0.3.5
wrapt==1.17.3
zhipuai==2.1.5.20250825
zstandard==0.25.0
# Special packages (platform-specific)
# Note: These may not be available on all platforms
# ocrmac==1.0.0 # macOS only
# pyobjc-* packages are macOS only

160
docs/UV_LOCK_GUIDE.md Normal file
View file

@ -0,0 +1,160 @@
# uv.lock Update Guide
## What is uv.lock?
`uv.lock` is uv's lock file. It captures the exact version of every dependency, including transitive ones, much like:
- Node.js `package-lock.json`
- Rust `Cargo.lock`
- Python Poetry `poetry.lock`
Keeping `uv.lock` in version control guarantees that everyone installs the same dependency set.
## When does uv.lock change?
### Situations where it does *not* change automatically
- Running `uv sync --frozen`
- Building Docker images that call `uv sync --frozen`
- Editing source code without touching dependency metadata
### Situations where it will change
1. **`uv lock` or `uv lock --upgrade`**
```bash
uv lock # Resolve according to current constraints
uv lock --upgrade # Re-resolve and upgrade to the newest compatible releases
```
Use these commands after modifying `pyproject.toml`, when you want fresh dependency versions, or if the lock file was deleted or corrupted.
2. **`uv add`**
```bash
uv add requests # Adds the dependency and updates both files
uv add --dev pytest # Adds a dev dependency
```
`uv add` edits `pyproject.toml` and refreshes `uv.lock` in one step.
3. **`uv remove`**
```bash
uv remove requests
```
This removes the dependency from `pyproject.toml` and rewrites `uv.lock`.
4. **`uv sync` without `--frozen`**
```bash
uv sync
```
Normally this only installs what is already locked. However, if `pyproject.toml` and `uv.lock` disagree or the lock file is missing, uv will regenerate and update `uv.lock`. In CI and production builds you should prefer `uv sync --frozen` to prevent unintended updates.
## Example workflows
### Scenario 1: Add a new dependency
```bash
# Recommended: let uv handle both files
uv add fastapi
git add pyproject.toml uv.lock
git commit -m "Add fastapi dependency"
# Manual alternative
# 1. Edit pyproject.toml
# 2. Regenerate the lock file
uv lock
git add pyproject.toml uv.lock
git commit -m "Add fastapi dependency"
```
### Scenario 2: Relax or tighten a version constraint
```bash
# 1. Edit the requirement in pyproject.toml,
# e.g. openai>=1.0.0,<2.0.0 -> openai>=1.5.0,<2.0.0
# 2. Re-resolve the lock file
uv lock
# 3. Commit both files
git add pyproject.toml uv.lock
git commit -m "Update openai to >=1.5.0"
```
### Scenario 3: Upgrade everything to the newest compatible versions
```bash
uv lock --upgrade
git diff uv.lock
git add uv.lock
git commit -m "Upgrade dependencies to latest compatible versions"
```
### Scenario 4: Teammate syncing the project
```bash
git pull # Fetch latest code and lock file
uv sync --frozen # Install exactly what uv.lock specifies
```
## Using uv.lock in Docker
```dockerfile
RUN uv sync --frozen --no-dev --extra api
```
`--frozen` guarantees reproducible builds because uv will refuse to deviate from the locked versions.
`--extra api` install API server
## Frequently asked questions
- **`uv.lock` is almost 1MB. Does that matter?**
No. The file is read only during dependency resolution.
- **Should we commit `uv.lock`?**
Yes. Commit it so collaborators and CI jobs share the same dependency graph.
- **Deleted the lock file by accident?**
Run `uv lock` to regenerate it from `pyproject.toml`.
- **Can `uv.lock` and `requirements.txt` coexist?**
They can, but maintaining both is redundant. Prefer relying on `uv.lock` alone whenever possible.
- **How do I inspect locked versions?**
```bash
uv tree
grep -A5 'name = "openai"' uv.lock
```
## Best practices
### Recommended
1. Commit `uv.lock` alongside `pyproject.toml`.
2. Use `uv sync --frozen` in CI, Docker, and other reproducible environments.
3. Use plain `uv sync` during local development if you want uv to reconcile the lock for you.
4. Run `uv lock --upgrade` periodically to pick up the latest compatible releases.
5. Regenerate the lock file immediately after changing dependency constraints.
### Avoid
1. Running `uv sync` without `--frozen` in CI or production pipelines.
2. Editing `uv.lock` by hand—uv will overwrite manual edits.
3. Ignoring lock file diffs in code reviews—unexpected dependency changes can break builds.
## Summary
| Command | Updates `uv.lock` | Typical use |
|-----------------------|-------------------|-------------------------------------------|
| `uv lock` | ✅ Yes | After editing constraints |
| `uv lock --upgrade` | ✅ Yes | Upgrade to the newest compatible versions |
| `uv add <pkg>` | ✅ Yes | Add a dependency |
| `uv remove <pkg>` | ✅ Yes | Remove a dependency |
| `uv sync` | ⚠️ Maybe | Local development; can regenerate the lock |
| `uv sync --frozen` | ❌ No | CI/CD, Docker, reproducible builds |
Remember: `uv.lock` only changes when you run a command that tells it to. Keep it in sync with your project and commit it whenever it changes.

5541
uv.lock generated Normal file

File diff suppressed because it is too large Load diff