Merge branch 'feat/ragas-evaluation' of https://github.com/anouar-bm/LightRAG into feat/ragas-evaluation

2025-11-03 13:30:16 +01:00 · 2025-11-03 13:30:16 +01:00 · debfa0ec96
commit debfa0ec96
parent a172cf893d 5da709b42a
6 changed files with 63 additions and 18 deletions
--- a/14
+++ b/14
@ -1,3 +1,5 @@
+# syntax=docker/dockerfile:1
+
 # Frontend build stage
 FROM oven/bun:1 AS frontend-builder

@ -7,7 +9,8 @@ WORKDIR /app
 COPY lightrag_webui/ ./lightrag_webui/

 # Build frontend assets for inclusion in the API package
-RUN cd lightrag_webui \
+RUN --mount=type=cache,target=/root/.bun/install/cache \
+    cd lightrag_webui \
    && bun install --frozen-lockfile \
    && bun run build

@ -40,7 +43,8 @@ COPY setup.py .
 COPY uv.lock .

 # Install base, API, and offline extras without the project to improve caching
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable

 # Copy project sources after dependency layer
 COPY lightrag/ ./lightrag/
@ -49,7 +53,8 @@ COPY lightrag/ ./lightrag/
 COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui

 # Sync project in non-editable mode and ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

 # Prepare offline cache directory and pre-populate tiktoken data
@ -81,7 +86,8 @@ ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH

 # Install dependencies with uv sync (uses locked versions from uv.lock)
 # And ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

 # Create persistent data directories AFTER package installation
--- a/Dockerfile.lite
+++ b/Dockerfile.lite
@ -1,3 +1,5 @@
+# syntax=docker/dockerfile:1
+
 # Frontend build stage
 FROM oven/bun:1 AS frontend-builder

@ -7,7 +9,8 @@ WORKDIR /app
 COPY lightrag_webui/ ./lightrag_webui/

 # Build frontend assets for inclusion in the API package
-RUN cd lightrag_webui \
+RUN --mount=type=cache,target=/root/.bun/install/cache \
+    cd lightrag_webui \
    && bun install --frozen-lockfile \
    && bun run build

@ -40,7 +43,8 @@ COPY setup.py .
 COPY uv.lock .

 # Install project dependencies (base + API extras) without the project to improve caching
-RUN uv sync --frozen --no-dev --extra api --no-install-project --no-editable
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-install-project --no-editable

 # Copy project sources after dependency layer
 COPY lightrag/ ./lightrag/
@ -49,7 +53,8 @@ COPY lightrag/ ./lightrag/
 COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui

 # Sync project in non-editable mode and ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --no-editable \
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

 # Prepare tiktoken cache directory and pre-populate tokenizer data
@ -81,7 +86,8 @@ ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH

 # Sync dependencies inside the final image using uv
 # And ensure pip is available for runtime installs
-RUN uv sync --frozen --no-dev --extra api --no-editable \
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-editable \
    && /app/.venv/bin/python -m ensurepip --upgrade

 # Create persistent data directories
--- a/docs/DockerDeployment.md
+++ b/docs/DockerDeployment.md
@ -59,10 +59,19 @@ LightRAG can be configured using environment variables in the `.env` file:

 Docker instructions work the same on all platforms with Docker Desktop installed.

+### Build Optimization
+
+The Dockerfile uses BuildKit cache mounts to significantly improve build performance:
+
+- **Automatic cache management**: BuildKit is automatically enabled via `# syntax=docker/dockerfile:1` directive
+- **Faster rebuilds**: Only downloads changed dependencies when `uv.lock` or `bun.lock` files are modified
+- **Efficient package caching**: UV and Bun package downloads are cached across builds
+- **No manual configuration needed**: Works out of the box in Docker Compose and GitHub Actions
+
 ### Start LightRAG  server:

 ```bash
-docker-compose up -d
+docker compose up -d
 ```

 LightRAG Server uses the following paths for data storage:
@ -77,9 +86,9 @@ data/

 To update the Docker container:
 ```bash
-docker-compose pull
-docker-compose down
-docker-compose up
+docker compose pull
+docker compose down
+docker compose up
 ```

 ### Offline deployment
@ -91,10 +100,15 @@ Software packages requiring `transformers`, `torch`, or `cuda` will is not prein
 ### For local development and testing

 ```bash
-# Build and run with docker-compose
+# Build and run with Docker Compose (BuildKit automatically enabled)
 docker compose up --build
+
+# Or explicitly enable BuildKit if needed
+DOCKER_BUILDKIT=1 docker compose up --build
 ```

+**Note**: BuildKit is automatically enabled by the `# syntax=docker/dockerfile:1` directive in the Dockerfile, ensuring optimal caching performance.
+
 ### For production release

 **multi-architecture build and push**:
--- a/env.example
+++ b/env.example
@ -166,10 +166,11 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10

-###########################################################
+###########################################################################
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
-###########################################################
+### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180

@ -224,10 +225,11 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock Specific Parameters
 # BEDROCK_LLM_TEMPERATURE=1.0

-####################################################################################
+#######################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
-####################################################################################
+### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+#######################################################################################
 # EMBEDDING_TIMEOUT=30
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@ -11,7 +11,6 @@ if not pm.is_installed("openai"):
    pm.install("openai")

 from openai import (
-    AsyncOpenAI,
    APIConnectionError,
    RateLimitError,
    APITimeoutError,
@ -27,6 +26,19 @@ from lightrag.utils import (
    safe_unicode_decode,
    logger,
 )
+
+# Try to import Langfuse for LLM observability (optional)
+# Falls back to standard OpenAI client if not available
+try:
+    from langfuse.openai import AsyncOpenAI
+
+    LANGFUSE_ENABLED = True
+    logger.info("Langfuse observability enabled for OpenAI client")
+except ImportError:
+    from openai import AsyncOpenAI
+
+    LANGFUSE_ENABLED = False
+    logger.debug("Langfuse not available, using standard OpenAI client")
 from lightrag.types import GPTKeywordExtractionFormat
 from lightrag.api import __api_version__

--- a/pyproject.toml
+++ b/pyproject.toml
@ -122,6 +122,11 @@ evaluation = [
    "pytest-asyncio>=1.2.0",
 ]

+observability = [
+    # LLM observability and tracing dependencies
+    "langfuse>=3.8.1",
+]
+
 [project.scripts]
 lightrag-server = "lightrag.api.lightrag_server:main"
 lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"