Merge branch 'main' into fix-main

2025-10-08 23:15:32 -04:00 · 2025-10-08 23:15:32 -04:00 · 101c83ad36
commit 101c83ad36
parent 87da3552a4 e91d1096aa
77 changed files with 2225 additions and 793 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.env.example
+++ b/.env.example
@ -37,6 +37,9 @@ AWS_SECRET_ACCESS_KEY=
 # OPTIONAL url for openrag link to langflow in the UI
 LANGFLOW_PUBLIC_URL=
 # OPTIONAL: Override host for docling service (for special networking setups)
 # HOST_DOCKER_INTERNAL=host.containers.internal
 # Langflow auth
 LANGFLOW_AUTO_LOGIN=False
 LANGFLOW_SUPERUSER=
--- a/.github/workflows/build-langflow-responses.yml
+++ b/.github/workflows/build-langflow-responses.yml
@ -1,59 +0,0 @@
 name: Build Langflow Responses Multi-Arch
 on:
  workflow_dispatch:
 jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        include:
          - platform: linux/amd64
            arch: amd64
            runs-on: ubuntu-latest
          - platform: linux/arm64
            arch: arm64
            runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
    runs-on: ${{ matrix.runs-on }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
      - name: Build and push langflow (${{ matrix.arch }})
        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./Dockerfile.langflow
          platforms: ${{ matrix.platform }}
          push: true
          tags: phact/langflow:responses-${{ matrix.arch }}
          cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
          cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
  manifest:
    needs: build
    runs-on: ubuntu-latest
    steps:
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
      - name: Create and push multi-arch manifest
        run: |
          docker buildx imagetools create -t phact/langflow:responses \
            phact/langflow:responses-amd64 \
            phact/langflow:responses-arm64
--- a/.github/workflows/build-multiarch.yml
+++ b/.github/workflows/build-multiarch.yml
@ -1,16 +1,95 @@
-name: Build Multi-Architecture Docker Images
+name: Release + Docker Images (multi-arch)
 on:
  push:
    branches:
      - main
    paths:
      - 'pyproject.toml'
  workflow_dispatch:
    inputs:
      update_latest:
        description: 'Update latest tags (production release)'
        required: false
        default: false
        type: boolean
 jobs:
  build-python-packages:
    runs-on: ubuntu-latest
    outputs:
      skip_release: ${{ steps.version.outputs.skip_release }}
      version: ${{ steps.version.outputs.version }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.13'
      - name: Install uv
        uses: astral-sh/setup-uv@v3
      - name: Extract version from pyproject.toml
        id: version
        run: |
          VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
          echo "version=$VERSION" >> $GITHUB_OUTPUT
          echo "Version: $VERSION"
          # Check if tag already exists
          if git rev-parse "v$VERSION" >/dev/null 2>&1; then
            echo "Tag v$VERSION already exists, skipping release"
            echo "skip_release=true" >> $GITHUB_OUTPUT
            exit 0
          fi
          echo "skip_release=false" >> $GITHUB_OUTPUT
          # Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
          if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
            echo "is_prerelease=false" >> $GITHUB_OUTPUT
            echo "Release type: Production"
          else
            echo "is_prerelease=true" >> $GITHUB_OUTPUT
            echo "Release type: Prerelease"
          fi
      - name: Build wheel and source distribution
        if: steps.version.outputs.skip_release != 'true'
        run: |
          uv build
      - name: List built artifacts
        if: steps.version.outputs.skip_release != 'true'
        run: |
          ls -la dist/
          echo "Built artifacts:"
          for file in dist/*; do
            echo "  - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
          done
      - name: Upload build artifacts
        if: steps.version.outputs.skip_release != 'true'
        uses: actions/upload-artifact@v4
        with:
          name: python-packages
          path: dist/
          retention-days: 30
      - name: Create Release
        if: steps.version.outputs.skip_release != 'true'
        uses: softprops/action-gh-release@v2
        with:
          tag_name: v${{ steps.version.outputs.version }}
          name: Release ${{ steps.version.outputs.version }}
          draft: false
          prerelease: ${{ steps.version.outputs.is_prerelease }}
          generate_release_notes: true
          files: |
            dist/*.whl
            dist/*.tar.gz
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  build:
    needs: build-python-packages
    if: needs.build-python-packages.outputs.skip_release != 'true'
    strategy:
      fail-fast: false
      matrix:
@ -106,9 +185,9 @@ jobs:
          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
  manifest:
-    needs: build
+    needs: [build, build-python-packages]
    runs-on: ubuntu-latest
-    if: github.event_name != 'pull_request'
+    if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@ -146,8 +225,8 @@ jobs:
            phact/openrag-opensearch:$VERSION-amd64 \
            phact/openrag-opensearch:$VERSION-arm64
-          # Only update latest tags if version is numeric AND checkbox is checked
+          # Only update latest tags if version is numeric
-          if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
+          if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
            echo "Updating latest tags for production release: $VERSION"
            docker buildx imagetools create -t phact/openrag-backend:latest \
              phact/openrag-backend:$VERSION-amd64 \
@ -165,5 +244,5 @@ jobs:
              phact/openrag-opensearch:$VERSION-amd64 \
              phact/openrag-opensearch:$VERSION-arm64
          else
-            echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
+            echo "Skipping latest tags - version: $VERSION (not numeric)"
          fi
--- a/.github/workflows/test-integration.yml
+++ b/.github/workflows/test-integration.yml
@ -0,0 +1,54 @@
 name: Integration Tests
 on:
  pull_request:
  push:
    branches:
      - main
 jobs:
  tests:
    runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
    env:
      # Prefer repository/environment variable first, then secret, then a sane fallback
      OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
    steps:
      - run: df -h
        #- name: "node-cleanup"
        #run: |
        #  sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
        #  sudo docker image prune --all --force
        #  sudo docker builder prune -a
      - run: df -h
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up UV
        uses: astral-sh/setup-uv@v3
        with:
          version: latest
      - name: Python version
        run: uv python install 3.13
      - name: Install dependencies
        run: uv sync
      - name: Run integration tests
        env:
          OPENSEARCH_HOST: localhost
          OPENSEARCH_PORT: 9200
          OPENSEARCH_USERNAME: admin
          OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
          LOG_LEVEL: DEBUG
          # Force no-auth mode so tests bypass OAuth
          GOOGLE_OAUTH_CLIENT_ID: ""
          GOOGLE_OAUTH_CLIENT_SECRET: ""
          # Disable startup ingest noise unless a test enables it
          DISABLE_STARTUP_INGEST: "true"
        run: |
          make test-ci
          echo "Keys directory after tests:"
          ls -la keys/ || echo "No keys directory"
--- a/.gitignore
+++ b/.gitignore
@ -18,6 +18,8 @@ wheels/
 1001*.pdf
 *.json
 !flows/*.json
 !src/tui/_assets/flows/*.json
 !src/tui/_assets/flows/components/*.json
 .DS_Store
 config/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -11,20 +11,48 @@ Thank you for your interest in contributing to OpenRAG! This guide will help you
 - Python 3.13+ with uv package manager
 - Node.js 18+ and npm
-### Environment Setup
+### Set up OpenRAG for development
 1. Set up your development environment.
 ```bash
-# Clone the repository
+# Clone and setup environment
-git clone <repository-url>
+git clone https://github.com/langflow-ai/openrag.git
 cd openrag
 # Setup development environment
 make setup  # Creates .env and installs dependencies
 ```
-### Configuration
+2. Configure the `.env` file with your API keys and credentials.
-Edit `.env` with your API keys and credentials. See the main README for required environment variables.
+```bash
 # Required
 OPENAI_API_KEY=your_openai_api_key
 OPENSEARCH_PASSWORD=your_secure_password
 LANGFLOW_SUPERUSER=admin
 LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
 LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
 LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
 NUDGES_FLOW_ID=your_nudges_flow_id
 ```
 For extended configuration, including ingestion and optional variables, see [docs/reference/configuration.mdx](docs/docs/reference/configuration.mdx).
 3. Start OpenRAG.
 ```bash
 # Full stack with GPU support
 make dev
 # Or CPU only
 make dev-cpu
 ```
 Access the services:
 - **Frontend**: http://localhost:3000
 - **Backend API**: http://localhost:8000
 - **Langflow**: http://localhost:7860
 - **OpenSearch**: http://localhost:9200
 - **OpenSearch Dashboards**: http://localhost:5601
 ## 🔧 Development Commands
--- a/Dockerfile.langflow
+++ b/Dockerfile.langflow
@ -1,4 +1,4 @@
-FROM langflowai/langflow-nightly:1.6.3.dev0
+FROM langflowai/langflow-nightly:1.6.3.dev1
 EXPOSE 7860
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1 @@
 recursive-include src/tui/_assets *
--- a/147
+++ b/147
@ -1,7 +1,17 @@
 # OpenRAG Development Makefile
 # Provides easy commands for development workflow
-.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
+# Load variables from .env if present so `make` commands pick them up
 ifneq (,$(wildcard .env))
  include .env
  # Export all simple KEY=VALUE pairs to the environment for child processes
  export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
 endif
 .PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
       test test-integration test-ci \
       backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
       shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
 # Default target
 help:
@ -32,14 +42,16 @@ help:
 	@echo "  shell-lf     - Shell into langflow container"
 	@echo ""
 	@echo "Testing:"
-	@echo "  test         - Run backend tests"
+	@echo "  test             - Run all backend tests"
 	@echo "  test-integration - Run integration tests (requires infra)"
 	@echo "  test-ci          - Start infra, run integration tests, tear down"
 	@echo "  lint         - Run linting checks"
 	@echo ""
 # Development environments
 dev:
 	@echo "🚀 Starting OpenRAG with GPU support..."
-	docker-compose up -d
+	docker compose up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@ -49,7 +61,7 @@ dev:
 dev-cpu:
 	@echo "🚀 Starting OpenRAG with CPU only..."
-	docker-compose -f docker-compose-cpu.yml up -d
+	docker compose -f docker-compose-cpu.yml up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@ -59,7 +71,7 @@ dev-cpu:
 dev-local:
 	@echo "🔧 Starting infrastructure only (for local development)..."
-	docker-compose up -d opensearch dashboards langflow
+	docker compose up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@ -69,7 +81,7 @@ dev-local:
 infra:
 	@echo "🔧 Starting infrastructure services only..."
-	docker-compose up -d opensearch dashboards langflow
+	docker compose up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure services started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@ -86,15 +98,15 @@ infra-cpu:
 # Container management
 stop:
 	@echo "🛑 Stopping all containers..."
-	docker-compose down
+	docker compose down
-	docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
+	docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
 restart: stop dev
 clean: stop
 	@echo "🧹 Cleaning up containers and volumes..."
-	docker-compose down -v --remove-orphans
+	docker compose down -v --remove-orphans
-	docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
+	docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
 	docker system prune -f
 # Local development
@ -114,7 +126,7 @@ install: install-be install-fe
 install-be:
 	@echo "📦 Installing backend dependencies..."
-	uv sync
+	uv sync --extra torch-cu128
 install-fe:
 	@echo "📦 Installing frontend dependencies..."
@ -123,7 +135,7 @@ install-fe:
 # Building
 build:
 	@echo "🔨 Building Docker images..."
-	docker-compose build
+	docker compose build
 build-be:
 	@echo "🔨 Building backend image..."
@ -136,41 +148,124 @@ build-fe:
 # Logging and debugging
 logs:
 	@echo "📋 Showing all container logs..."
-	docker-compose logs -f
+	docker compose logs -f
 logs-be:
 	@echo "📋 Showing backend logs..."
-	docker-compose logs -f openrag-backend
+	docker compose logs -f openrag-backend
 logs-fe:
 	@echo "📋 Showing frontend logs..."
-	docker-compose logs -f openrag-frontend
+	docker compose logs -f openrag-frontend
 logs-lf:
 	@echo "📋 Showing langflow logs..."
-	docker-compose logs -f langflow
+	docker compose logs -f langflow
 logs-os:
 	@echo "📋 Showing opensearch logs..."
-	docker-compose logs -f opensearch
+	docker compose logs -f opensearch
 # Shell access
 shell-be:
 	@echo "🐚 Opening shell in backend container..."
-	docker-compose exec openrag-backend /bin/bash
+	docker compose exec openrag-backend /bin/bash
 shell-lf:
 	@echo "🐚 Opening shell in langflow container..."
-	docker-compose exec langflow /bin/bash
+	docker compose exec langflow /bin/bash
 shell-os:
 	@echo "🐚 Opening shell in opensearch container..."
-	docker-compose exec opensearch /bin/bash
+	docker compose exec opensearch /bin/bash
 # Testing and quality
 test:
-	@echo "🧪 Running backend tests..."
+	@echo "🧪 Running all backend tests..."
-	uv run pytest
+	uv run pytest tests/ -v
 test-integration:
 	@echo "🧪 Running integration tests (requires infrastructure)..."
 	@echo "💡 Make sure to run 'make infra' first!"
 	uv run pytest tests/integration/ -v
 # CI-friendly integration test target: brings up infra, waits, runs tests, tears down
 test-ci:
 	@set -e; \
 	echo "Installing test dependencies..."; \
 	uv sync --group dev; \
 	if [ ! -f keys/private_key.pem ]; then \
 		echo "Generating RSA keys for JWT signing..."; \
 		uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
 	else \
 		echo "RSA keys already exist, ensuring correct permissions..."; \
 		chmod 600 keys/private_key.pem 2>/dev/null || true; \
 		chmod 644 keys/public_key.pem 2>/dev/null || true; \
 	fi; \
 	echo "Cleaning up old containers and volumes..."; \
 	docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
 	echo "Pulling latest images..."; \
 	docker compose -f docker-compose-cpu.yml pull; \
 	echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
 	docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
 	echo "Starting docling-serve..."; \
 	DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
 	echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
 	echo "Waiting for backend OIDC endpoint..."; \
 	for i in $$(seq 1 60); do \
 		docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
 	done; \
 	echo "Waiting for OpenSearch security config to be fully applied..."; \
 	for i in $$(seq 1 60); do \
 		if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
 			echo "✓ Security configuration applied"; \
 			break; \
 		fi; \
 		sleep 2; \
 	done; \
 	echo "Verifying OIDC authenticator is active in OpenSearch..."; \
 	AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
 	if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
 		echo "✓ OIDC authenticator configured"; \
 		echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
 	else \
 		echo "✗ OIDC authenticator NOT found in security config!"; \
 		echo "Security config:"; \
 		echo "$$AUTHC_CONFIG" | head -50; \
 		exit 1; \
 	fi; \
 	echo "Waiting for Langflow..."; \
 	for i in $$(seq 1 60); do \
 		curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
 	done; \
 	echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
 	for i in $$(seq 1 60); do \
 		curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
 	done; \
 	echo "Running integration tests"; \
 	LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
 	GOOGLE_OAUTH_CLIENT_ID="" \
 	GOOGLE_OAUTH_CLIENT_SECRET="" \
 	OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
 	OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
 	DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
 	uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
 	TEST_RESULT=$$?; \
 	echo ""; \
 	echo "=== Post-test JWT diagnostics ==="; \
 	echo "Generating test JWT token..."; \
 	TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
 	if [ -n "$$TEST_TOKEN" ]; then \
 		echo "Testing JWT against OpenSearch..."; \
 		HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
 		echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
 	fi; \
 	echo "================================="; \
 	echo ""; \
 	echo "Tearing down infra"; \
 	uv run python scripts/docling_ctl.py stop || true; \
 	docker compose down -v || true; \
 	exit $$TEST_RESULT
 lint:
 	@echo "🔍 Running linting checks..."
@ -180,19 +275,19 @@ lint:
 # Service status
 status:
 	@echo "📊 Container status:"
-	@docker-compose ps 2>/dev/null || echo "No containers running"
+	@docker compose ps 2>/dev/null || echo "No containers running"
 health:
 	@echo "🏥 Health check:"
 	@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
 	@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
-	@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
+	@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
 # Database operations
 db-reset:
 	@echo "🗄️ Resetting OpenSearch indices..."
-	curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
+	curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
-	curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
+	curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
 	@echo "Indices reset. Restart backend to recreate."
 # Flow management
--- a/README.md
+++ b/README.md
@ -2,20 +2,6 @@
 # OpenRAG
 </div>
 <div align="center">
  <a href="#quick-start" style="color: #0366d6;">🚀 Quick Start</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#tui-interface" style="color: #0366d6;">💻 TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#docker-deployment" style="color: #0366d6;">🐳 Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#development" style="color: #0366d6;">⚙️ Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#troubleshooting" style="color: #0366d6;">🔧 Troubleshooting</a>
 </div>
 OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration. [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/phact/openrag)
 <div align="center">
  <a href="https://github.com/langflow-ai/langflow"><img src="https://img.shields.io/badge/Langflow-1C1C1E?style=flat&logo=langflow" alt="Langflow"></a>
  &nbsp;&nbsp;
@ -24,144 +10,124 @@ OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables
  <a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
  &nbsp;&nbsp;
  <a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
-
+  &nbsp;&nbsp;
  <a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </div>
 OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
 </div>
 <div align="center">
  <a href="#quickstart" style="color: #0366d6;">Quickstart</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#tui-interface" style="color: #0366d6;">TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#docker-deployment" style="color: #0366d6;">Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#development" style="color: #0366d6;">Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
  <a href="#troubleshooting" style="color: #0366d6;">Troubleshooting</a>
 </div>
 ## Quickstart
 Use the OpenRAG Terminal User Interface (TUI) to manage your OpenRAG installation without complex command-line operations.
 To launch OpenRAG with the TUI, do the following:
-## 🚀 Quick Start
+1. Clone the OpenRAG repository.
    ```bash
    git clone https://github.com/langflow-ai/openrag.git
    cd openrag
    ```
-### Prerequisites
+2. To start the TUI, from the repository root, run:
    ```bash
    # Install dependencies first
    uv sync
- Docker or Podman with Compose installed
+    # Launch the TUI
- Make (for development commands)
+    uv run openrag
    ```
-### 1. Environment Setup
+    The TUI opens and guides you through OpenRAG setup.
-```bash
+For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
 # Clone and setup environment
 git clone https://github.com/langflow-ai/openrag.git
 cd openrag
 make setup  # Creates .env and installs dependencies
 ```
-### 2. Configure Environment
+## Docker Deployment
-Edit `.env` with your API keys and credentials:
+If you prefer to use Docker to run OpenRAG, the repository includes two Docker Compose `.yml` files.
 They deploy the same applications and containers, but to different environments.
-```bash
+- [`docker-compose.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose.yml) is an OpenRAG deployment for environments with GPU support. GPU support requires an NVIDIA GPU with CUDA support and compatible NVIDIA drivers installed on the OpenRAG host machine. 
 # Required
 OPENAI_API_KEY=your_openai_api_key
 OPENSEARCH_PASSWORD=your_secure_password
 LANGFLOW_SUPERUSER=admin
 LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
 LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
 LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
 NUDGES_FLOW_ID=your_nudges_flow_id
 ```
 See extended configuration, including ingestion and optional variables: [docs/reference/configuration.md](docs/docs/reference/configuration.md)
 ### 3. Start OpenRAG
-```bash
+- [`docker-compose-cpu.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) is a CPU-only version of OpenRAG for systems without GPU support. Use this Docker compose file for environments where GPU drivers aren't available.
 # Full stack with GPU support
 make dev
-# Or CPU only
+Both Docker deployments depend on `docling serve` to be running on port `5001` on the host machine. This enables [Mac MLX](https://opensource.apple.com/projects/mlx/) support for document processing. Installing OpenRAG with the TUI starts `docling serve` automatically, but for a Docker deployment you must manually start the `docling serve` process.
 make dev-cpu
 ```
-Access the services:
+To deploy OpenRAG with Docker:
 - **Frontend**: http://localhost:3000
 - **Backend API**: http://localhost:8000
 - **Langflow**: http://localhost:7860
 - **OpenSearch**: http://localhost:9200
 - **OpenSearch Dashboards**: http://localhost:5601
-## 🖥️ TUI Interface
+1. Clone the OpenRAG repository.
    ```bash
    git clone https://github.com/langflow-ai/openrag.git
    cd openrag
    ```
-OpenRAG includes a powerful Terminal User Interface (TUI) for easy setup, configuration, and monitoring. The TUI provides a user-friendly way to manage your OpenRAG installation without complex command-line operations.
+2. Install dependencies.
    ```bash
    uv sync
    ```
-![OpenRAG TUI Interface](assets/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)
+3. Start `docling serve` on the host machine.
    ```bash
    uv run python scripts/docling_ctl.py start --port 5001
    ```
-### Launching the TUI
+4. Confirm `docling serve` is running.
    ```
    uv run python scripts/docling_ctl.py status
    ```
-```bash
+    Successful result:
-# Install dependencies first
+    ```bash
-uv sync
+    Status: running
    Endpoint: http://127.0.0.1:5001
    Docs: http://127.0.0.1:5001/docs
    PID: 27746
    ```
-# Launch the TUI
+5. Build and start all services.
 uv run openrag
 ```
-### TUI Features
+    For the GPU-accelerated deployment, run:
    ```bash
    docker compose build
    docker compose up -d
    ```
-See the full TUI guide for features, navigation, and benefits: [docs/get-started/tui.mdx](docs/docs/get-started/tui.mdx)
+    For environments without GPU support, run: 
    ```bash
    docker compose -f docker-compose-cpu.yml up -d
    ```
   The OpenRAG Docker Compose file starts five containers:
   | Container Name | Default Address | Purpose |
   |---|---|---|
   | OpenRAG Backend | http://localhost:8000 | FastAPI server and core functionality. |
   | OpenRAG Frontend | http://localhost:3000 | React web interface for users. |
   | Langflow | http://localhost:7860 | AI workflow engine and flow management. |
   | OpenSearch | http://localhost:9200 | Vector database for document storage. |
   | OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
 6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
    To stop `docling serve`, run:
-## 🐳 Docker Deployment
+    ```bash
    uv run python scripts/docling_ctl.py stop
    ```
-### Standard Deployment
+For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
-```bash
+## Troubleshooting
 # Build and start all services
 docker compose build
 docker compose up -d
 ```
-### CPU-Only Deployment
+For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
-For environments without GPU support:
+## Development
-```bash
+For developers wanting to contribute to OpenRAG or set up a development environment, see [CONTRIBUTING.md](CONTRIBUTING.md).
 docker compose -f docker-compose-cpu.yml up -d
 ```
 More deployment commands and tips: [docs/get-started/docker.mdx](docs/docs/get-started/docker.mdx)
 ## 🔧 Troubleshooting
 ### Podman on macOS
 If using Podman on macOS, you may need to increase VM memory:
 ```bash
 podman machine stop
 podman machine rm
 podman machine init --memory 8192   # 8 GB example
 podman machine start
 ```
 ### Common Issues
 See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)
 ## 🛠️ Development
 For developers wanting to contribute to OpenRAG or set up a development environment, please see our comprehensive development guide:
 **[📚 See CONTRIBUTING.md for detailed development instructions](CONTRIBUTING.md)**
 The contributing guide includes:
 - Complete development environment setup
 - Local development workflows  
 - Testing and debugging procedures
 - Code style guidelines
 - Architecture overview
 - Pull request guidelines
 ### Quick Development Commands
 ```bash
 make help                    # See all available commands
 make setup                   # Initial development setup
 make infra                   # Start infrastructure services
 make backend                 # Run backend locally
 make frontend                # Run frontend locally
 ```
--- a/docker-compose-cpu.yml
+++ b/docker-compose-cpu.yml
@ -74,7 +74,7 @@ services:
    volumes:
      - ./documents:/app/documents:Z
      - ./keys:/app/keys:Z
-      - ./flows:/app/flows:Z
+      - ./flows:/app/flows:U,z
  openrag-frontend:
    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
@ -91,7 +91,7 @@ services:
  langflow:
    volumes:
-      - ./flows:/app/flows:Z
+      - ./flows:/app/flows:U,z
    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
    # build:
    #   context: .
@ -108,6 +108,7 @@ services:
      - OWNER_NAME=None
      - OWNER_EMAIL=None
      - CONNECTOR_TYPE=system
      - CONNECTOR_TYPE_URL=url
      - OPENRAG-QUERY-FILTER="{}"
      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - FILENAME=None
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -73,7 +73,7 @@ services:
    volumes:
      - ./documents:/app/documents:Z
      - ./keys:/app/keys:Z
-      - ./flows:/app/flows:z
+      - ./flows:/app/flows:U,z
    gpus: all
  openrag-frontend:
@ -81,7 +81,6 @@ services:
    # build:
    #   context: .
    #   dockerfile: Dockerfile.frontend
      #dockerfile: Dockerfile.frontend
    container_name: openrag-frontend
    depends_on:
      - openrag-backend
@ -92,7 +91,7 @@ services:
  langflow:
    volumes:
-      - ./flows:/app/flows:z
+      - ./flows:/app/flows:U,z
    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
    # build:
    #   context: .
@ -109,6 +108,7 @@ services:
      - OWNER_NAME=None
      - OWNER_EMAIL=None
      - CONNECTOR_TYPE=system
      - CONNECTOR_TYPE_URL=url
      - OPENRAG-QUERY-FILTER="{}"
      - FILENAME=None
      - MIMETYPE=None
--- a/docs/docs/_partial-external-preview.mdx
+++ b/docs/docs/_partial-external-preview.mdx
@ -1,4 +0,0 @@
 :::info
 OpenRAG is is currently in public preview.
 Development is ongoing, and the features and functionality are subject to change.
 :::
--- a/docs/docs/core-components/agents.mdx
+++ b/docs/docs/core-components/agents.mdx
@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
 </details>
-## Use the OpenRAG OpenSearch Agent flow
+## Use the OpenRAG OpenSearch Agent flow {#flow}
 If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
 To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
-This flow contains seven components connected together to chat with your data:
+This flow contains eight components connected together to chat with your data:
 * The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
 The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
 * The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`. 
 This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
 * The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
 * An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
 <PartialModifyFlows />
--- a/docs/docs/core-components/ingestion.mdx
+++ b/docs/docs/core-components/ingestion.mdx
@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
 More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
@ -51,3 +48,30 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
 The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
 For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
 ## Knowledge ingestion flows
 [Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
 The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
 This flow contains ten components connected together to process and store documents in your knowledge base.
 * The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
 * The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
 * Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
 * The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
 * Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
 * The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
 * The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
 * The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
 <PartialModifyFlows />
 ### OpenSearch URL Ingestion flow {#url-flow}
 An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
 The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
 For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
 To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).
--- a/docs/docs/core-components/knowledge.mdx
+++ b/docs/docs/core-components/knowledge.mdx
@ -7,17 +7,23 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
 This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
 OpenSearch provides powerful hybrid search capabilities with enterprise-grade security and multi-tenancy support.
 ## Authentication and document access {#auth}
 OpenRAG supports two authentication modes based on how you [install OpenRAG](/install), and which mode you choose affects document access.
 **No-auth mode (Basic Setup)**: This mode uses a single anonymous JWT token for OpenSearch authentication, so documents uploaded to the `documents` index by one user are visible to all other users on the OpenRAG server.
 **OAuth mode (Advanced Setup)**: Each OpenRAG user is granted a JWT token, and each document is tagged with user ownership. Documents are filtered by user ownership, ensuring users only see documents they uploaded or have access to. 
 ## Ingest knowledge
 OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
 To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
 ### Direct file ingestion
@ -78,18 +84,6 @@ You can select multiples.
 The ingestion process may take some time, depending on the size of your documents.
 4. When ingestion is complete, your documents are available in the Knowledge screen.
 ### Sync cloud connectors
 Your connected data sources are found in the <Icon name="Settings2" aria-hidden="true"/> **Settings** page.
 When you click **Sync Now** for a connected cloud service like Google Drive, OpenRAG scans your connected Google Drive account to find files that match your sync criteria. Sync criteria are controlled in **Sync Settings** on the same page. You can sync all files, or select a maximum number of files to sync.
 For each file found, OpenRAG downloads, converts, and embeds the processed content into OpenSearch.
 You can monitor the sync progress in the <Icon name="Bell" aria-hidden="true"/> **Tasks** sidebar.
 Once processing is complete, the synced documents become available in your knowledge base and can be searched through the chat interface or Knowledge page.
 ## Explore knowledge
 The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
@ -101,10 +95,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
 <PartialModifyFlows />
 ### Knowledge ingestion settings
 To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
 ## Create knowledge filters
 OpenRAG includes a knowledge filter system for organizing and managing document collections.
--- a/docs/docs/get-started/docker.mdx
+++ b/docs/docs/get-started/docker.mdx
@ -4,9 +4,6 @@ slug: /get-started/docker
 ---
 import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 There are two different Docker Compose files.
 They deploy the same applications and containers, but to different environments.
--- a/docs/docs/get-started/install.mdx
+++ b/docs/docs/get-started/install.mdx
@ -6,9 +6,6 @@ slug: /install
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialOnboarding from '@site/docs/_partial-onboarding.mdx'; 
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 [Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.
@ -65,13 +62,15 @@ The OpenRAG wheel installs the Terminal User Interface (TUI) for configuring and
 ## Set up OpenRAG with the TUI {#setup}
 The TUI creates a `.env` file in your OpenRAG directory root and starts OpenRAG.
 If the TUI detects a `.env` file in the OpenRAG root directory, it sources any variables from the `.env` file.
 If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
-**Basic Setup** generates all of the required values except the OpenAI API key.
+**Basic Setup** generates all of the required values for OpenRAG except the OpenAI API key.
-**Basic Setup** does not set up OAuth connections for ingestion from Google Drive, OneDrive, or AWS.
+**Basic Setup** does not set up OAuth connections for ingestion from cloud providers.
 For OAuth setup, use **Advanced Setup**.
-If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
+**Basic Setup** and **Advanced Setup** enforce the same authentication settings for the Langflow server, but manage document access differently. For more information, see [Authentication and document access](/knowledge#auth).
-If the TUI detects a `.env` file in the OpenRAG root directory, it will source any variables from the `.env` file.
+
 <Tabs groupId="Setup method">
  <TabItem value="Basic setup" label="Basic setup" default>
@ -90,6 +89,7 @@ If the TUI detects a `.env` file in the OpenRAG root directory, it will source a
   7. Continue with [Application Onboarding](#application-onboarding).
  </TabItem>
  <TabItem value="Advanced setup" label="Advanced setup">
   1. To install OpenRAG with **Advanced Setup**, click **Advanced Setup** or press <kbd>2</kbd>. 
   2. Click **Generate Passwords** to generate passwords for OpenSearch and Langflow.
   3. Paste your OpenAI API key in the OpenAI API key field.
--- a/docs/docs/get-started/quickstart.mdx
+++ b/docs/docs/get-started/quickstart.mdx
@ -6,9 +6,6 @@ slug: /quickstart
 import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.
--- a/docs/docs/get-started/tui.mdx
+++ b/docs/docs/get-started/tui.mdx
@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
 slug: /get-started/tui
 ---
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
 ![OpenRAG TUI Interface](@site/static/img/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)
--- a/docs/docs/get-started/what-is-openrag.mdx
+++ b/docs/docs/get-started/what-is-openrag.mdx
@ -3,10 +3,6 @@ title: What is OpenRAG?
 slug: /
 ---
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 OpenRAG is an open-source package for building agentic RAG systems.
 It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
--- a/docs/docs/support/troubleshoot.mdx
+++ b/docs/docs/support/troubleshoot.mdx
@ -5,9 +5,6 @@ slug: /support/troubleshoot
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
 <PartialExternalPreview />
 This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
--- a/docs/static/img/opensearch-agent-flow.png
+++ b/docs/static/img/opensearch-agent-flow.png
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
--- a/flows/openrag_url_mcp.json
+++ b/flows/openrag_url_mcp.json
@ -232,6 +232,7 @@
      },
      {
        "animated": false,
        "className": "",
        "data": {
          "sourceHandle": {
            "dataType": "EmbeddingModel",
@ -733,6 +734,10 @@
                  {
                    "key": "owner_email",
                    "value": "OWNER_EMAIL"
                  },
                  {
                    "key": "connector_type",
                    "value": "CONNECTOR_TYPE_URL"
                  }
                ]
              },
@ -1808,7 +1813,7 @@
            ],
            "frozen": false,
            "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.023Z",
+            "last_updated": "2025-10-06T17:46:55.068Z",
            "legacy": false,
            "lf_version": "1.6.0",
            "metadata": {
@ -2224,7 +2229,7 @@
            ],
            "frozen": false,
            "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.025Z",
+            "last_updated": "2025-10-06T17:46:55.069Z",
            "legacy": false,
            "lf_version": "1.6.0",
            "metadata": {
@ -2897,7 +2902,7 @@
            ],
            "frozen": false,
            "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.026Z",
+            "last_updated": "2025-10-06T17:46:55.069Z",
            "legacy": false,
            "metadata": {
              "code_hash": "b4d6b19b6eef",
@ -3310,7 +3315,7 @@
            ],
            "frozen": false,
            "icon": "binary",
-            "last_updated": "2025-10-03T20:31:47.177Z",
+            "last_updated": "2025-10-06T17:46:54.996Z",
            "legacy": false,
            "metadata": {
              "code_hash": "8607e963fdef",
@ -3595,17 +3600,17 @@
      }
    ],
    "viewport": {
-      "x": -407.1633937626607,
+      "x": -538.2311610019549,
-      "y": -577.5291936220412,
+      "y": -337.3313239657308,
-      "zoom": 0.5347553210574026
+      "zoom": 0.45546556043892106
    }
  },
  "description": "This flow is to ingest the URL to open search.",
  "endpoint_name": null,
  "mcp_enabled": true,
  "id": "72c3d17c-2dac-4a73-b48a-6518473d7830",
  "mcp_enabled": true,
  "is_component": false,
-  "last_tested_version": "1.6.0",
+  "last_tested_version": "1.6.3.dev1",
  "name": "OpenSearch URL Ingestion Flow",
  "tags": [
    "openai",
--- a/frontend/components/knowledge-search-input.tsx
+++ b/frontend/components/knowledge-search-input.tsx
@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
        {queryOverride && (
          <Button
            variant="ghost"
-            className="h-full !px-1.5 !py-0"
+            className="h-full rounded-sm !px-1.5 !py-0"
            type="button"
            onClick={() => {
              setSearchQueryInput("");
@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
        <Button
          variant="ghost"
          className={cn(
-            "h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
+            "h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
            searchQueryInput && "block"
          )}
          type="submit"
--- a/frontend/src/app/connectors/page.tsx
+++ b/frontend/src/app/connectors/page.tsx
@ -92,6 +92,7 @@ export default function ConnectorsPage() {
          selectedFiles={selectedFiles}
          isAuthenticated={false} // This would come from auth context in real usage
          accessToken={undefined} // This would come from connected account
          isIngesting={isSyncing}
        />
      </div>
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
 import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
 // import { Label } from "@/components/ui/label";
 // import { Checkbox } from "@/components/ui/checkbox";
 import { filterAccentClasses } from "@/components/knowledge-filter-panel";
 import { ProtectedRoute } from "@/components/protected-route";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
 import { useTask } from "@/contexts/task-context";
 import {
  type ChunkResult,
  type File,
@ -35,9 +30,9 @@ function ChunksPageContent() {
  const { parsedFilterData, queryOverride } = useKnowledgeFilter();
  const filename = searchParams.get("filename");
  const [chunks, setChunks] = useState<ChunkResult[]>([]);
-  const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
+  // const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
-    ChunkResult[]
+  //   ChunkResult[]
-  >([]);
+  // >([]);
  // const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
  const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
    number | null
@ -83,13 +78,13 @@ function ChunksPageContent() {
  }, [data, filename]);
  // Set selected state for all checkboxes when selectAll changes
-  useEffect(() => {
+  // useEffect(() => {
-    if (selectAll) {
+  //   if (selectAll) {
-      setSelectedChunks(new Set(chunks.map((_, index) => index)));
+  //     setSelectedChunks(new Set(chunks.map((_, index) => index)));
-    } else {
+  //   } else {
-      setSelectedChunks(new Set());
+  //     setSelectedChunks(new Set());
-    }
+  //   }
-  }, [selectAll, setSelectedChunks, chunks]);
+  // }, [selectAll, setSelectedChunks, chunks]);
  const handleBack = useCallback(() => {
    router.push("/knowledge");
@ -126,26 +121,25 @@ function ChunksPageContent() {
  return (
    <div className="flex flex-col h-full">
-      <div className="flex flex-col h-full">
+      {/* Header */}
-        {/* Header */}
+      <div className="flex flex-col mb-6">
-        <div className="flex flex-col mb-6">
+        <div className="flex items-center gap-3 mb-6">
-          <div className="flex items-center gap-3 mb-6">
+          <Button
-            <Button
+            variant="ghost"
-              variant="ghost"
+            onClick={handleBack}
-              onClick={handleBack}
+            size="sm"
-              size="sm"
+            className="max-w-8 max-h-8 -m-2"
-              className="max-w-8 max-h-8 -m-2"
+          >
-            >
+            <ArrowLeft size={24} />
-              <ArrowLeft size={24} />
+          </Button>
-            </Button>
+          <h1 className="text-lg font-semibold">
-            <h1 className="text-lg font-semibold">
+            {/* Removes file extension from filename */}
-              {/* Removes file extension from filename */}
+            {filename.replace(/\.[^/.]+$/, "")}
-              {filename.replace(/\.[^/.]+$/, "")}
+          </h1>
-            </h1>
+        </div>
-          </div>
+        <div className="flex flex-1">
-          <div className="flex flex-1">
+          <KnowledgeSearchInput />
-            <KnowledgeSearchInput />
+          {/* <div className="flex items-center pl-4 gap-2">
            {/* <div className="flex items-center pl-4 gap-2">
              <Checkbox
                id="selectAllChunks"
                checked={selectAll}
@ -160,11 +154,12 @@ function ChunksPageContent() {
                Select all
              </Label>
            </div> */}
          </div>
        </div>
      </div>
-        {/* Content Area - matches knowledge page structure */}
+      <div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
-        <div className="flex-1 overflow-auto pr-6">
+        {/* Content Area */}
        <div className="row-start-2 lg:row-start-1">
          {isFetching ? (
            <div className="flex items-center justify-center h-64">
              <div className="text-center">
@ -185,7 +180,7 @@ function ChunksPageContent() {
            </div>
          ) : (
            <div className="space-y-4 pb-6">
-              {chunksFilteredByQuery.map((chunk, index) => (
+              {chunks.map((chunk, index) => (
                <div
                  key={chunk.filename + index}
                  className="bg-muted rounded-lg p-4 border border-border/50"
@ -242,31 +237,30 @@ function ChunksPageContent() {
            </div>
          )}
        </div>
-      </div>
+        {/* Right panel - Summary (TODO), Technical details,  */}
-      {/* Right panel - Summary (TODO), Technical details,  */}
+        {chunks.length > 0 && (
-      {chunks.length > 0 && (
+          <div className="min-w-[200px]">
-        <div className="w-[320px] py-20 px-2">
+            <div className="mb-8">
-          <div className="mb-8">
+              <h2 className="text-xl font-semibold mb-4">Technical details</h2>
-            <h2 className="text-xl font-semibold mt-3 mb-4">
+              <dl>
-              Technical details
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-            </h2>
+                  <dt className="text-sm/6 text-muted-foreground">
-            <dl>
+                    Total chunks
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  </dt>
-                <dt className="text-sm/6 text-muted-foreground">
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  Total chunks
+                    {chunks.length}
-                </dt>
+                  </dd>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                </div>
-                  {chunks.length}
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                </dd>
+                  <dt className="text-sm/6 text-muted-foreground">
-              </div>
+                    Avg length
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  </dt>
-                <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                    {averageChunkLength.toFixed(0)} chars
-                  {averageChunkLength.toFixed(0)} chars
+                  </dd>
-                </dd>
+                </div>
-              </div>
+                {/* TODO: Uncomment after data is available */}
-              {/* TODO: Uncomment after data is available */}
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              <dt className="text-sm/6 text-muted-foreground">Process time</dt>
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
              </dd>
@ -276,54 +270,55 @@ function ChunksPageContent() {
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
              </dd>
            </div> */}
-            </dl>
+              </dl>
-          </div>
+            </div>
-          <div className="mb-8">
+            <div className="mb-4">
-            <h2 className="text-xl font-semibold mt-2 mb-3">
+              <h2 className="text-xl font-semibold mt-2 mb-3">
-              Original document
+                Original document
-            </h2>
+              </h2>
-            <dl>
+              <dl>
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              <dt className="text-sm/6 text-muted-foreground">Name</dt>
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                {fileData?.filename}
              </dd>
            </div> */}
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">Type</dt>
+                  <dt className="text-sm/6 text-muted-foreground">Type</dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+                    {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
-                </dd>
+                  </dd>
-              </div>
+                </div>
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">Size</dt>
+                  <dt className="text-sm/6 text-muted-foreground">Size</dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {fileData?.size
+                    {fileData?.size
-                    ? `${Math.round(fileData.size / 1024)} KB`
+                      ? `${Math.round(fileData.size / 1024)} KB`
-                    : "Unknown"}
+                      : "Unknown"}
-                </dd>
+                  </dd>
-              </div>
+                </div>
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                N/A
              </dd>
            </div> */}
-              {/* TODO: Uncomment after data is available */}
+                {/* TODO: Uncomment after data is available */}
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              <dt className="text-sm/6 text-muted-foreground">Source</dt>
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
            </div> */}
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
              <dt className="text-sm/6 text-muted-foreground">Updated</dt>
              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                N/A
              </dd>
            </div> */}
-            </dl>
+              </dl>
            </div>
          </div>
-        </div>
+        )}
-      )}
+      </div>
    </div>
  );
 }
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@ -85,6 +85,7 @@ interface Connector {
  connectionId?: string;
  access_token?: string;
  selectedFiles?: GoogleDriveFile[] | OneDriveFile[];
  available?: boolean;
 }
 interface SyncResult {
--- a/frontend/src/app/upload/[provider]/page.tsx
+++ b/frontend/src/app/upload/[provider]/page.tsx
@ -165,7 +165,7 @@ export default function UploadProviderPage() {
  const handleFileSelected = (files: CloudFile[]) => {
    setSelectedFiles(files);
-    console.log(`Selected ${files.length} files from ${provider}:`, files);
+    console.log(`Selected ${files.length} item(s) from ${provider}:`, files);
    // You can add additional handling here like triggering sync, etc.
  };
@ -376,19 +376,19 @@ export default function UploadProviderPage() {
                loading={isIngesting}
                disabled={!hasSelectedFiles || isIngesting}
              >
-                {!hasSelectedFiles ? (
+                {hasSelectedFiles ? (
                  <>Ingest files</>
                ) : (
                  <>
-                    Ingest {selectedFiles.length} file
+                    Ingest {selectedFiles.length} item
                    {selectedFiles.length > 1 ? "s" : ""}
                  </>
                ) : (
                  <>Ingest selected items</>
                )}
              </Button>
            </TooltipTrigger>
            {!hasSelectedFiles ? (
              <TooltipContent side="left">
-                Select at least one file before ingesting
+                Select at least one item before ingesting
              </TooltipContent>
            ) : null}
          </Tooltip>
--- a/frontend/src/components/cloud-connectors-dialog.tsx
+++ b/frontend/src/components/cloud-connectors-dialog.tsx
@ -201,7 +201,7 @@ export function CloudConnectorsDialog({
        <DialogHeader>
          <DialogTitle>Cloud File Connectors</DialogTitle>
          <DialogDescription>
-            Select files from your connected cloud storage providers
+            Select files or folders from your connected cloud storage providers
          </DialogDescription>
        </DialogHeader>
@ -232,7 +232,7 @@ export function CloudConnectorsDialog({
                        !connector.hasAccessToken
                          ? connector.accessTokenError ||
                            "Access token required - try reconnecting your account"
-                          : `Select files from ${connector.name}`
+                          : `Select files or folders from ${connector.name}`
                      }
                      onClick={e => {
                        e.preventDefault();
@ -283,6 +283,7 @@ export function CloudConnectorsDialog({
                        accessToken={connectorAccessTokens[connector.type]}
                        onPickerStateChange={() => {}}
                        clientId={connector.clientId}
                        isIngesting={false}
                      />
                    </div>
                  );
--- a/frontend/src/components/cloud-picker/file-list.tsx
+++ b/frontend/src/components/cloud-picker/file-list.tsx
@ -26,7 +26,7 @@ export const FileList = ({
  return (
    <div className="space-y-2 relative">
      <div className="flex items-center justify-between">
-        <p className="text-sm font-medium">Added files ({files.length})</p>
+        <p className="text-sm font-medium">Selected items ({files.length})</p>
        <Button
          ignoreTitleCase={true}
          onClick={onClearAll}
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@ -39,7 +39,7 @@ export const PickerHeader = ({
    return (
      <div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
        Please connect to {getProviderName(provider)} first to select specific
-        files.
+        files or folders.
      </div>
    );
  }
@ -48,7 +48,7 @@ export const PickerHeader = ({
    <Card>
      <CardContent className="flex flex-col items-center text-center py-8">
        <p className="text-sm text-primary mb-4">
-          Select files from {getProviderName(provider)} to ingest.
+          Select files or folders from {getProviderName(provider)} to ingest.
        </p>
        <Button
          onClick={onAddFiles}
@ -56,7 +56,7 @@ export const PickerHeader = ({
          className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
        >
          <Plus className="h-4 w-4" />
-          {isPickerOpen ? "Opening picker..." : "Add files"}
+          {isPickerOpen ? "Opening picker..." : "Add files or folders"}
        </Button>
      </CardContent>
    </Card>
--- a/frontend/src/components/cloud-picker/provider-handlers.ts
+++ b/frontend/src/components/cloud-picker/provider-handlers.ts
@ -52,12 +52,16 @@ export class GoogleDriveHandler {
    try {
      this.onPickerStateChange?.(true);
      // Create a view for regular documents
      const docsView = new window.google.picker.DocsView()
        .setIncludeFolders(true)
        .setSelectFolderEnabled(true);
      const picker = new window.google.picker.PickerBuilder()
-        .addView(window.google.picker.ViewId.DOCS)
+        .addView(docsView)
        .addView(window.google.picker.ViewId.FOLDERS)
        .setOAuthToken(this.accessToken)
        .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
-        .setTitle("Select files from Google Drive")
+        .setTitle("Select files or folders from Google Drive")
        .setCallback(data => this.pickerCallback(data, onFileSelected))
        .build();
--- a/frontend/src/components/cloud-picker/types.ts
+++ b/frontend/src/components/cloud-picker/types.ts
@ -53,6 +53,7 @@ declare global {
          load: (callback: () => void) => void;
        };
        PickerBuilder: new () => GooglePickerBuilder;
        DocsView: new () => GoogleDocsView;
        ViewId: {
          DOCS: string;
          FOLDERS: string;
@ -83,8 +84,13 @@ declare global {
  }
 }
 export interface GoogleDocsView {
  setIncludeFolders: (include: boolean) => GoogleDocsView;
  setSelectFolderEnabled: (enabled: boolean) => GoogleDocsView;
 }
 export interface GooglePickerBuilder {
-  addView: (view: string) => GooglePickerBuilder;
+  addView: (view: GoogleDocsView | string) => GooglePickerBuilder;
  setOAuthToken: (token: string) => GooglePickerBuilder;
  setCallback: (
    callback: (data: GooglePickerData) => void
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@ -19,6 +19,7 @@ import {
 import { useAuth } from "@/contexts/auth-context";
 // Task interface is now imported from useGetTasksQuery
 export type { Task };
 export interface TaskFile {
  filename: string;
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,10 @@
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "openrag"
-version = "0.1.14.dev3"
+version = "0.1.19"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
@ -31,6 +35,9 @@ dependencies = [
    "docling-serve>=1.4.1",
 ]
 [dependency-groups]
 dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
 [project.scripts]
 openrag = "tui.main:run_tui"
--- a/scripts/docling_ctl.py
+++ b/scripts/docling_ctl.py
@ -0,0 +1,91 @@
 #!/usr/bin/env python3
 """Helper script to control docling-serve using DoclingManager for CI/testing."""
 import sys
 import asyncio
 import argparse
 from pathlib import Path
 # Add src to path so we can import DoclingManager
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 from tui.managers.docling_manager import DoclingManager
 async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
    """Start docling-serve."""
    manager = DoclingManager()
    if manager.is_running():
        print(f"Docling-serve is already running")
        status = manager.get_status()
        print(f"Endpoint: {status['endpoint']}")
        return 0
    host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
    print(f"Starting docling-serve on {host_msg}...")
    success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
    if success:
        print(f"{message}")
        status = manager.get_status()
        print(f"Endpoint: {status['endpoint']}")
        print(f"PID: {status['pid']}")
        return 0
    else:
        print(f"{message}", file=sys.stderr)
        return 1
 async def stop_docling():
    """Stop docling-serve."""
    manager = DoclingManager()
    if not manager.is_running():
        print("Docling-serve is not running")
        return 0
    print("Stopping docling-serve...")
    success, message = await manager.stop()
    if success:
        print(f"{message}")
        return 0
    else:
        print(f"{message}", file=sys.stderr)
        return 1
 async def status_docling():
    """Get docling-serve status."""
    manager = DoclingManager()
    status = manager.get_status()
    print(f"Status: {status['status']}")
    if status['status'] == 'running':
        print(f"Endpoint: {status['endpoint']}")
        print(f"Docs: {status['docs_url']}")
        print(f"PID: {status['pid']}")
    return 0 if status['status'] == 'running' else 1
 async def main():
    parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
    parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
    parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
    parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
    parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
    args = parser.parse_args()
    if args.command == "start":
        return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
    elif args.command == "stop":
        return await stop_docling()
    elif args.command == "status":
        return await status_docling()
 if __name__ == "__main__":
    sys.exit(asyncio.run(main()))
--- a/src/api/docling.py
+++ b/src/api/docling.py
@ -0,0 +1,120 @@
 """Docling service proxy endpoints."""
 import socket
 import struct
 from pathlib import Path
 import httpx
 from starlette.requests import Request
 from starlette.responses import JSONResponse
 from utils.container_utils import (
    detect_container_environment,
    get_container_host,
    guess_host_ip_for_containers,
 )
 from utils.logging_config import get_logger
 logger = get_logger(__name__)
 def _get_gateway_ip_from_route() -> str | None:
    """Return the default gateway IP visible from the current network namespace."""
    try:
        with Path("/proc/net/route").open() as route_table:
            next(route_table)  # Skip header
            for line in route_table:
                fields = line.strip().split()
                min_fields = 3  # interface, destination, gateway
                if len(fields) >= min_fields and fields[1] == "00000000":
                    gateway_hex = fields[2]
                    gw_int = int(gateway_hex, 16)
                    gateway_ip = socket.inet_ntoa(struct.pack("<L", gw_int))
                    return gateway_ip
    except (FileNotFoundError, PermissionError, IndexError, ValueError) as err:
        logger.warning("Could not read routing table: %s", err)
    return None
 def determine_docling_host() -> str:
    """Determine the host address used for docling health checks."""
    container_type = detect_container_environment()
    if container_type:
        # Try HOST_DOCKER_INTERNAL env var first
        container_host = get_container_host()
        if container_host:
            logger.info("Using container-aware host '%s'", container_host)
            return container_host
        # Try special hostnames (Docker Desktop and rootless podman)
        import socket
        for hostname in ["host.docker.internal", "host.containers.internal"]:
            try:
                socket.getaddrinfo(hostname, None)
                logger.info("Using %s for container-to-host communication", hostname)
                return hostname
            except socket.gaierror:
                logger.debug("%s not available", hostname)
        # Try gateway IP detection (Docker on Linux)
        gateway_ip = _get_gateway_ip_from_route()
        if gateway_ip:
            logger.info("Detected host gateway IP: %s", gateway_ip)
            return gateway_ip
        # Fallback to bridge IP
        fallback_ip = guess_host_ip_for_containers(logger=logger)
        logger.info("Falling back to container bridge host %s", fallback_ip)
        return fallback_ip
    # Running outside a container
    logger.info("Running outside a container; using localhost")
    return "localhost"
 # Detect the host IP once at startup
 HOST_IP = determine_docling_host()
 DOCLING_SERVICE_URL = f"http://{HOST_IP}:5001"
 async def health(request: Request) -> JSONResponse:
    """
    Proxy health check to docling-serve.
    This allows the frontend to check docling status via same-origin request.
    """
    health_url = f"{DOCLING_SERVICE_URL}/health"
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                health_url,
                timeout=2.0
            )
            if response.status_code == 200:
                return JSONResponse({
                    "status": "healthy",
                    "host": HOST_IP
                })
            else:
                logger.warning("Docling health check failed", url=health_url, status_code=response.status_code)
                return JSONResponse({
                    "status": "unhealthy",
                    "message": f"Health check failed with status: {response.status_code}",
                    "host": HOST_IP
                }, status_code=503)
    except httpx.TimeoutException:
        logger.warning("Docling health check timeout", url=health_url)
        return JSONResponse({
            "status": "unhealthy",
            "message": "Connection timeout",
            "host": HOST_IP
        }, status_code=503)
    except Exception as e:
        logger.error("Docling health check failed", url=health_url, error=str(e))
        return JSONResponse({
            "status": "unhealthy",
            "message": str(e),
            "host": HOST_IP
        }, status_code=503)
--- a/src/auth_middleware.py
+++ b/src/auth_middleware.py
@ -28,7 +28,6 @@ def require_auth(session_manager):
        async def wrapper(request: Request):
            # In no-auth mode, bypass authentication entirely
            if is_no_auth_mode():
                logger.debug("No-auth mode: Creating anonymous user")
                # Create an anonymous user object so endpoints don't break
                from session_manager import User
                from datetime import datetime
@ -36,7 +35,6 @@ def require_auth(session_manager):
                from session_manager import AnonymousUser
                request.state.user = AnonymousUser()
                request.state.jwt_token = None  # No JWT in no-auth mode
                logger.debug("Set user_id=anonymous, jwt_token=None")
                return await handler(request)
            user = get_current_user(request, session_manager)
--- a/src/config/settings.py
+++ b/src/config/settings.py
@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
 from utils.document_processing import create_document_converter
 from utils.logging_config import get_logger
-load_dotenv()
+load_dotenv(override=False)
-load_dotenv("../")
+load_dotenv("../", override=False)
 logger = get_logger(__name__)
@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
 def is_no_auth_mode():
    """Check if we're running in no-auth mode (OAuth credentials missing)"""
    result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
    logger.debug(
        "Checking auth mode",
        no_auth_mode=result,
        has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
        has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
    )
    return result
--- a/src/connectors/google_drive/connector.py
+++ b/src/connectors/google_drive/connector.py
@ -1,21 +1,20 @@
 import io
 import os
 from pathlib import Path
 import time
 from collections import deque
 from dataclasses import dataclass
-from typing import Dict, List, Any, Optional, Iterable, Set
+from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Set
 from googleapiclient.errors import HttpError
 from googleapiclient.http import MediaIoBaseDownload
 from utils.logging_config import get_logger
 logger = get_logger(__name__)
 # Project-specific base types (adjust imports to your project)
 from ..base import BaseConnector, ConnectorDocument, DocumentACL
 from .oauth import GoogleDriveOAuth
 logger = get_logger(__name__)
 # -------------------------
 # Config model
@ -32,8 +31,8 @@ class GoogleDriveConfig:
    recursive: bool = True
    # Shared Drives control
-    drive_id: Optional[str] = None        # when set, we use corpora='drive'
+    drive_id: Optional[str] = None  # when set, we use corpora='drive'
-    corpora: Optional[str] = None         # 'user' | 'drive' | 'domain'; auto-picked if None
+    corpora: Optional[str] = None  # 'user' | 'drive' | 'domain'; auto-picked if None
    # Optional filtering
    include_mime_types: Optional[List[str]] = None
@ -80,7 +79,6 @@ class GoogleDriveConnector(BaseConnector):
    _FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
    _FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
    def emit(self, doc: ConnectorDocument) -> None:
        """
        Emit a ConnectorDocument instance.
@ -100,7 +98,9 @@ class GoogleDriveConnector(BaseConnector):
        # Token file default (so callback & workers don’t need to pass it)
        project_root = Path(__file__).resolve().parent.parent.parent.parent
-        token_file = config.get("token_file") or str(project_root / "google_drive_token.json")
+        token_file = config.get("token_file") or str(
            project_root / "google_drive_token.json"
        )
        Path(token_file).parent.mkdir(parents=True, exist_ok=True)
        if not isinstance(client_id, str) or not client_id.strip():
@ -115,7 +115,9 @@ class GoogleDriveConnector(BaseConnector):
            )
        # Normalize incoming IDs from any of the supported alias keys
-        def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]:
+        def _first_present_list(
            cfg: Dict[str, Any], keys: Iterable[str]
        ) -> Optional[List[str]]:
            for k in keys:
                v = cfg.get(k)
                if v:  # accept non-empty list
@ -151,6 +153,7 @@ class GoogleDriveConnector(BaseConnector):
        # Drive client is built in authenticate()
        from google.oauth2.credentials import Credentials
        self.creds: Optional[Credentials] = None
        self.service: Any = None
@ -214,7 +217,7 @@ class GoogleDriveConnector(BaseConnector):
                        "id, name, mimeType, modifiedTime, createdTime, size, "
                        "webViewLink, parents, owners, driveId"
                    ),
-                    **self._drives_flags,
+                    **self._drives_get_flags,
                )
                .execute()
            )
@ -285,7 +288,9 @@ class GoogleDriveConnector(BaseConnector):
        Fetch metadata for a file by ID (resolving shortcuts).
        """
        if self.service is None:
-            raise RuntimeError("Google Drive service is not initialized. Please authenticate first.")
+            raise RuntimeError(
                "Google Drive service is not initialized. Please authenticate first."
            )
        try:
            meta = (
                self.service.files()
@ -323,24 +328,40 @@ class GoogleDriveConnector(BaseConnector):
    def _iter_selected_items(self) -> List[Dict[str, Any]]:
        """
        Return a de-duplicated list of file metadata for the selected scope:
-          - explicit file_ids
+          - explicit file_ids (automatically expands folders to their contents)
          - items inside folder_ids (with optional recursion)
        Shortcuts are resolved to their targets automatically.
        """
        seen: Set[str] = set()
        items: List[Dict[str, Any]] = []
        folders_to_expand: List[str] = []
-        # Explicit files
+        # Process file_ids: separate actual files from folders
        if self.cfg.file_ids:
            for fid in self.cfg.file_ids:
                meta = self._get_file_meta_by_id(fid)
-                if meta and meta["id"] not in seen:
+                if not meta:
                    continue
                # If it's a folder, add to folders_to_expand instead
                if meta.get("mimeType") == "application/vnd.google-apps.folder":
                    logger.debug(
                        f"Item {fid} ({meta.get('name')}) is a folder, "
                        f"will expand to contents"
                    )
                    folders_to_expand.append(fid)
                elif meta["id"] not in seen:
                    # It's a regular file, add it directly
                    seen.add(meta["id"])
                    items.append(meta)
-        # Folders
+        # Collect all folders to expand (from both file_ids and folder_ids)
        if self.cfg.folder_ids:
-            folder_children = self._bfs_expand_folders(self.cfg.folder_ids)
+            folders_to_expand.extend(self.cfg.folder_ids)
        # Expand all folders to their contents
        if folders_to_expand:
            folder_children = self._bfs_expand_folders(folders_to_expand)
            for meta in folder_children:
                meta = self._resolve_shortcut(meta)
                if meta.get("id") in seen:
@ -357,7 +378,11 @@ class GoogleDriveConnector(BaseConnector):
        items = self._filter_by_mime(items)
        # Exclude folders from final emits:
-        items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
+        items = [
            m
            for m in items
            if m.get("mimeType") != "application/vnd.google-apps.folder"
        ]
        return items
    # -------------------------
@ -389,29 +414,85 @@ class GoogleDriveConnector(BaseConnector):
    def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
        """
        Download bytes for a given file (exporting if Google-native).
        Raises ValueError if the item is a folder (folders cannot be downloaded).
        """
        file_id = file_meta["id"]
        file_name = file_meta.get("name", "unknown")
        mime_type = file_meta.get("mimeType") or ""
-        # Google-native: export
+        logger.debug(
-        export_mime = self._pick_export_mime(mime_type)
+            f"Downloading file {file_id} ({file_name}) with mimetype: {mime_type}"
-        if mime_type.startswith("application/vnd.google-apps."):
+        )
-            # default fallback if not overridden
+
-            #if not export_mime:
+        # Folders cannot be downloaded or exported - this should never be reached
-            #    export_mime = "application/pdf"
+        # as folders are automatically expanded in _iter_selected_items()
-            export_mime = "application/pdf"
+        if mime_type == "application/vnd.google-apps.folder":
            raise ValueError(
                f"Cannot download folder {file_id} ({file_name}). "
                f"This is a bug - folders should be automatically expanded before download."
            )
        # According to https://stackoverflow.com/questions/65053558/google-drive-api-v3-files-export-method-throws-a-403-error-export-only-support
        # export_media ONLY works for Google Docs Editors files (Docs, Sheets, Slides, Drawings)
        # All other files (including other Google Apps types like Forms, Sites, Maps) must use get_media
        # Define which Google Workspace files are exportable
        exportable_types = {
            "application/vnd.google-apps.document",  # Google Docs
            "application/vnd.google-apps.spreadsheet",  # Google Sheets
            "application/vnd.google-apps.presentation",  # Google Slides
            "application/vnd.google-apps.drawing",  # Google Drawings
        }
        if mime_type in exportable_types:
            # This is an exportable Google Workspace file - must use export_media
            export_mime = self._pick_export_mime(mime_type)
            if not export_mime:
                # Default fallback for unsupported Google native types
                export_mime = "application/pdf"
            logger.debug(
                f"Using export_media for {file_id} ({mime_type} -> {export_mime})"
            )
            # NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
-            request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
+            request = self.service.files().export_media(
                fileId=file_id, mimeType=export_mime
            )
        else:
            # This is a regular uploaded file (PDF, image, video, etc.) - use get_media
            # Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
            logger.debug(f"Using get_media for {file_id} ({mime_type})")
            # Binary download (get_media also doesn't accept the Drive flags)
            request = self.service.files().get_media(fileId=file_id)
        # Download the file with error handling for misclassified Google Docs
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
        done = False
-        while not done:
+
-            status, done = downloader.next_chunk()
+        try:
-            # Optional: you can log progress via status.progress()
+            while not done:
                status, done = downloader.next_chunk()
                # Optional: you can log progress via status.progress()
        except HttpError as e:
            # If download fails with "fileNotDownloadable", it's a Docs Editor file
            # that wasn't properly detected. Retry with export_media.
            if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
                logger.warning(
                    f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
                    f"Retrying with export_media (file might be a Google Doc)"
                )
                export_mime = "application/pdf"
                request = self.service.files().export_media(
                    fileId=file_id, mimeType=export_mime
                )
                fh = io.BytesIO()
                downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
                done = False
                while not done:
                    status, done = downloader.next_chunk()
            else:
                raise
        return fh.getvalue()
@ -430,7 +511,9 @@ class GoogleDriveConnector(BaseConnector):
            # If still not authenticated, bail (caller should kick off OAuth init)
            if not await self.oauth.is_authenticated():
-                logger.debug("authenticate: no valid credentials; run OAuth init/callback first.")
+                logger.debug(
                    "authenticate: no valid credentials; run OAuth init/callback first."
                )
                return False
            # Build Drive service from OAuth helper
@ -450,7 +533,7 @@ class GoogleDriveConnector(BaseConnector):
        self,
        page_token: Optional[str] = None,
        max_files: Optional[int] = None,
-        **kwargs
+        **kwargs,
    ) -> Dict[str, Any]:
        """
        List files in the currently selected scope (file_ids/folder_ids/recursive).
@ -487,11 +570,20 @@ class GoogleDriveConnector(BaseConnector):
    async def get_file_content(self, file_id: str) -> ConnectorDocument:
        """
        Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
        Raises FileNotFoundError if the ID is a folder (folders cannot be downloaded).
        """
        meta = self._get_file_meta_by_id(file_id)
        if not meta:
            raise FileNotFoundError(f"Google Drive file not found: {file_id}")
        # Check if this is a folder - folders cannot be downloaded
        if meta.get("mimeType") == "application/vnd.google-apps.folder":
            raise FileNotFoundError(
                f"Cannot download folder {file_id} ({meta.get('name')}). "
                f"Folders must be expanded to list their contents. "
                f"This ID should not have been passed to get_file_content()."
            )
        try:
            blob = self._download_file_bytes(meta)
        except Exception as e:
@ -527,7 +619,9 @@ class GoogleDriveConnector(BaseConnector):
            metadata={
                "parents": meta.get("parents"),
                "driveId": meta.get("driveId"),
-                "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                "size": int(meta.get("size", 0))
                if str(meta.get("size", "")).isdigit()
                else None,
            },
        )
        return doc
@ -546,10 +640,14 @@ class GoogleDriveConnector(BaseConnector):
        # 1) Ensure we are authenticated and have a live Drive service
        ok = await self.authenticate()
        if not ok:
-            raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
+            raise RuntimeError(
                "GoogleDriveConnector.setup_subscription: not authenticated"
            )
        # 2) Resolve webhook address (no param in ABC, so pull from config/env)
-        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL")
+        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
            "GOOGLE_DRIVE_WEBHOOK_URL"
        )
        if not webhook_address:
            raise RuntimeError(
                "GoogleDriveConnector.setup_subscription: webhook URL not configured. "
@ -600,7 +698,9 @@ class GoogleDriveConnector(BaseConnector):
            }
            if not isinstance(channel_id, str) or not channel_id:
-                raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
+                raise RuntimeError(
                    f"Drive watch returned invalid channel id: {channel_id!r}"
                )
            return channel_id
@ -665,13 +765,20 @@ class GoogleDriveConnector(BaseConnector):
            return False
        try:
-            self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
                body={"id": subscription_id, "resourceId": resource_id}
            ).execute()
            # 4) Clear local bookkeeping
-            if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id:
+            if (
                getattr(self, "_active_channel", None)
                and self._active_channel.get("channel_id") == subscription_id
            ):
                self._active_channel = {}
-            if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
+            if hasattr(self, "_subscriptions") and isinstance(
                self._subscriptions, dict
            ):
                self._subscriptions.pop(subscription_id, None)
            return True
@ -722,7 +829,9 @@ class GoogleDriveConnector(BaseConnector):
            except Exception as e:
                selected_ids = set()
                try:
-                    logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
+                    logger.error(
                        f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
                    )
                except Exception:
                    pass
@ -759,7 +868,11 @@ class GoogleDriveConnector(BaseConnector):
                    # Filter to our selected scope if we have one; otherwise accept all
                    if selected_ids and (rid not in selected_ids):
                        # Shortcut target might be in scope even if the shortcut isn't
-                        tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
+                        tgt = (
                            fobj.get("shortcutDetails", {}).get("targetId")
                            if fobj
                            else None
                        )
                        if not (tgt and tgt in selected_ids):
                            continue
@ -808,7 +921,9 @@ class GoogleDriveConnector(BaseConnector):
                blob = self._download_file_bytes(meta)
            except HttpError as e:
                # Skip/record failures
-                logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
+                logger.error(
                    f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
                )
                continue
            from datetime import datetime
@ -838,7 +953,9 @@ class GoogleDriveConnector(BaseConnector):
                    "webViewLink": meta.get("webViewLink"),
                    "parents": meta.get("parents"),
                    "driveId": meta.get("driveId"),
-                    "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                    "size": int(meta.get("size", 0))
                    if str(meta.get("size", "")).isdigit()
                    else None,
                },
                content=blob,
            )
@ -849,7 +966,9 @@ class GoogleDriveConnector(BaseConnector):
    # -------------------------
    def get_start_page_token(self) -> str:
        # getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
-        resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
+        resp = (
            self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
        )
        return resp["startPageToken"]
    def poll_changes_and_sync(self) -> Optional[str]:
@ -888,7 +1007,10 @@ class GoogleDriveConnector(BaseConnector):
                # Match scope
                if fid not in selected_ids:
                    # also consider shortcut target
-                    if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
+                    if (
                        file_obj.get("mimeType")
                        == "application/vnd.google-apps.shortcut"
                    ):
                        tgt = file_obj.get("shortcutDetails", {}).get("targetId")
                        if tgt and tgt in selected_ids:
                            pass
@ -923,7 +1045,10 @@ class GoogleDriveConnector(BaseConnector):
                    modified_time=parse_datetime(resolved.get("modifiedTime")),
                    mimetype=str(resolved.get("mimeType", "")),
                    acl=DocumentACL(),  # Set appropriate ACL if needed
-                    metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")},
+                    metadata={
                        "parents": resolved.get("parents"),
                        "driveId": resolved.get("driveId"),
                    },
                    content=blob,
                )
                self.emit(doc)
@ -945,7 +1070,9 @@ class GoogleDriveConnector(BaseConnector):
    # -------------------------
    # Optional: webhook stubs
    # -------------------------
-    def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]:
+    def build_watch_body(
        self, webhook_address: str, channel_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Prepare the request body for changes.watch if you use webhooks.
        """
@ -964,7 +1091,7 @@ class GoogleDriveConnector(BaseConnector):
        body = self.build_watch_body(webhook_address)
        result = (
            self.service.changes()
-            .watch(pageToken=page_token, body=body, **self._drives_flags)
+            .watch(pageToken=page_token, body=body, **self._drives_get_flags)
            .execute()
        )
        return result
@ -974,7 +1101,9 @@ class GoogleDriveConnector(BaseConnector):
        Stop a previously started webhook watch.
        """
        try:
-            self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
                body={"id": channel_id, "resourceId": resource_id}
            ).execute()
            return True
        except HttpError as e:
--- a/src/connectors/langflow_connector_service.py
+++ b/src/connectors/langflow_connector_service.py
@ -1,5 +1,3 @@
 import os
 import tempfile
 from typing import Any, Dict, List, Optional
 # Create custom processor for connector files using Langflow
@ -60,14 +58,14 @@ class LangflowConnectorService:
        # Create temporary file from document content
        with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
            # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                f.write(document.content)
            # Step 1: Upload file to Langflow
            logger.debug("Uploading file to Langflow", filename=document.filename)
            content = document.content
            file_tuple = (
-                document.filename.replace(" ", "_").replace("/", "_")+suffix,
+                document.filename.replace(" ", "_").replace("/", "_") + suffix,
                content,
                document.mimetype or "application/octet-stream",
            )
@ -256,7 +254,10 @@ class LangflowConnectorService:
        file_ids: List[str],
        jwt_token: str = None,
    ) -> str:
-        """Sync specific files by their IDs using Langflow processing"""
+        """
        Sync specific files by their IDs using Langflow processing.
        Automatically expands folders to their contents.
        """
        if not self.task_service:
            raise ValueError(
                "TaskService not available - connector sync requires task service dependency"
@ -279,10 +280,50 @@ class LangflowConnectorService:
        owner_name = user.name if user else None
        owner_email = user.email if user else None
        # Temporarily set file_ids in the connector's config so list_files() can use them
        # Store the original values to restore later
        cfg = getattr(connector, "cfg", None)
        original_file_ids = None
        original_folder_ids = None
        if cfg is not None:
            original_file_ids = getattr(cfg, "file_ids", None)
            original_folder_ids = getattr(cfg, "folder_ids", None)
        try:
            # Set the file_ids we want to sync in the connector's config
            if cfg is not None:
                cfg.file_ids = file_ids  # type: ignore
                cfg.folder_ids = None  # type: ignore
            # Get the expanded list of file IDs (folders will be expanded to their contents)
            # This uses the connector's list_files() which calls _iter_selected_items()
            result = await connector.list_files()
            expanded_file_ids = [f["id"] for f in result.get("files", [])]
            if not expanded_file_ids:
                logger.warning(
                    f"No files found after expanding file_ids. "
                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
                    f"with no contents, or files that were filtered out."
                )
                # Return empty task rather than failing
                raise ValueError("No files to sync after expanding folders")
        except Exception as e:
            logger.error(f"Failed to expand file_ids via list_files(): {e}")
            # Fallback to original file_ids if expansion fails
            expanded_file_ids = file_ids
        finally:
            # Restore original config values
            if cfg is not None:
                cfg.file_ids = original_file_ids  # type: ignore
                cfg.folder_ids = original_folder_ids  # type: ignore
        processor = LangflowConnectorFileProcessor(
            self,
            connection_id,
-            file_ids,
+            expanded_file_ids,
            user_id,
            jwt_token=jwt_token,
            owner_name=owner_name,
@ -291,7 +332,7 @@ class LangflowConnectorService:
        # Create custom task using TaskService
        task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
        )
        return task_id
--- a/src/connectors/service.py
+++ b/src/connectors/service.py
@ -1,16 +1,11 @@
-import tempfile
+from typing import Any, Dict, List, Optional
 import os
 from typing import Dict, Any, List, Optional
 from .base import BaseConnector, ConnectorDocument
 from utils.logging_config import get_logger
-logger = get_logger(__name__)
+from .base import BaseConnector, ConnectorDocument
 from .google_drive import GoogleDriveConnector
 from .sharepoint import SharePointConnector
 from .onedrive import OneDriveConnector
 from .connection_manager import ConnectionManager
 logger = get_logger(__name__)
@ -56,9 +51,11 @@ class ConnectorService:
        # Create temporary file from document content
        from utils.file_utils import auto_cleanup_tempfile
-        with auto_cleanup_tempfile(suffix=self._get_file_extension(document.mimetype)) as tmp_path:
+        with auto_cleanup_tempfile(
            suffix=self._get_file_extension(document.mimetype)
        ) as tmp_path:
            # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                f.write(document.content)
            # Use existing process_file_common function with connector document metadata
@ -71,6 +68,7 @@ class ConnectorService:
            # Process using consolidated processing pipeline
            from models.processors import TaskProcessor
            processor = TaskProcessor(document_service=doc_service)
            result = await processor.process_document_standard(
                file_path=tmp_path,
@ -301,7 +299,10 @@ class ConnectorService:
        file_ids: List[str],
        jwt_token: str = None,
    ) -> str:
-        """Sync specific files by their IDs (used for webhook-triggered syncs)"""
+        """
        Sync specific files by their IDs (used for webhook-triggered syncs or manual selection).
        Automatically expands folders to their contents.
        """
        if not self.task_service:
            raise ValueError(
                "TaskService not available - connector sync requires task service dependency"
@ -324,14 +325,53 @@ class ConnectorService:
        owner_name = user.name if user else None
        owner_email = user.email if user else None
        # Temporarily set file_ids in the connector's config so list_files() can use them
        # Store the original values to restore later
        original_file_ids = None
        original_folder_ids = None
        if hasattr(connector, "cfg"):
            original_file_ids = getattr(connector.cfg, "file_ids", None)
            original_folder_ids = getattr(connector.cfg, "folder_ids", None)
        try:
            # Set the file_ids we want to sync in the connector's config
            if hasattr(connector, "cfg"):
                connector.cfg.file_ids = file_ids  # type: ignore
                connector.cfg.folder_ids = None  # type: ignore
            # Get the expanded list of file IDs (folders will be expanded to their contents)
            # This uses the connector's list_files() which calls _iter_selected_items()
            result = await connector.list_files()
            expanded_file_ids = [f["id"] for f in result.get("files", [])]
            if not expanded_file_ids:
                logger.warning(
                    f"No files found after expanding file_ids. "
                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
                    f"with no contents, or files that were filtered out."
                )
                # Return empty task rather than failing
                raise ValueError("No files to sync after expanding folders")
        except Exception as e:
            logger.error(f"Failed to expand file_ids via list_files(): {e}")
            # Fallback to original file_ids if expansion fails
            expanded_file_ids = file_ids
        finally:
            # Restore original config values
            if hasattr(connector, "cfg"):
                connector.cfg.file_ids = original_file_ids  # type: ignore
                connector.cfg.folder_ids = original_folder_ids  # type: ignore
        # Create custom processor for specific connector files
        from models.processors import ConnectorFileProcessor
-        # We'll pass file_ids as the files_info, the processor will handle ID-only files
+        # Use expanded_file_ids which has folders already expanded
        processor = ConnectorFileProcessor(
            self,
            connection_id,
-            file_ids,
+            expanded_file_ids,
            user_id,
            jwt_token=jwt_token,
            owner_name=owner_name,
@ -340,7 +380,7 @@ class ConnectorService:
        # Create custom task using TaskService
        task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
        )
        return task_id
--- a/src/main.py
+++ b/src/main.py
@ -131,7 +131,7 @@ async def configure_alerting_security():
        # Don't fail startup if alerting config fails
-async def _ensure_opensearch_index(self):
+async def _ensure_opensearch_index():
    """Ensure OpenSearch index exists when using traditional connector service."""
    try:
        # Check if index already exists
@ -242,6 +242,9 @@ def generate_jwt_keys():
                capture_output=True,
            )
            # Set restrictive permissions on private key (readable by owner only)
            os.chmod(private_key_path, 0o600)
            # Generate public key
            subprocess.run(
                [
@ -257,12 +260,21 @@ def generate_jwt_keys():
                capture_output=True,
            )
            # Set permissions on public key (readable by all)
            os.chmod(public_key_path, 0o644)
            logger.info("Generated RSA keys for JWT signing")
        except subprocess.CalledProcessError as e:
            logger.error("Failed to generate RSA keys", error=str(e))
            raise
    else:
-        logger.info("RSA keys already exist, skipping generation")
+        # Ensure correct permissions on existing keys
        try:
            os.chmod(private_key_path, 0o600)
            os.chmod(public_key_path, 0o644)
            logger.info("RSA keys already exist, ensured correct permissions")
        except OSError as e:
            logger.warning("Failed to set permissions on existing keys", error=str(e))
 async def init_index_when_ready():
--- a/src/services/auth_service.py
+++ b/src/services/auth_service.py
@ -296,11 +296,16 @@ class AuthService:
            try:
                if self.langflow_mcp_service and isinstance(jwt_token, str) and jwt_token.strip():
                    global_vars = {"JWT": jwt_token}
                    global_vars["CONNECTOR_TYPE_URL"] = "url"
                    if user_info:
                        if user_info.get("id"):
                            global_vars["OWNER"] = user_info.get("id")
                        if user_info.get("name"):
-                            global_vars["OWNER_NAME"] = user_info.get("name")
+                            # OWNER_NAME may contain spaces, which can cause issues in headers.
                            # Alternative: URL-encode the owner name to preserve spaces and special characters.
                            owner_name = user_info.get("name")
                            if owner_name:
                                global_vars["OWNER_NAME"] = str(f"\"{owner_name}\"")
                        if user_info.get("email"):
                            global_vars["OWNER_EMAIL"] = user_info.get("email")
--- a/src/services/document_service.py
+++ b/src/services/document_service.py
@ -126,7 +126,11 @@ class DocumentService:
        from utils.file_utils import auto_cleanup_tempfile
        import os
-        with auto_cleanup_tempfile() as tmp_path:
+        # Preserve file extension for docling format detection
        filename = upload_file.filename or "uploaded"
        suffix = os.path.splitext(filename)[1] or ""
        with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
            # Stream upload file to temporary file
            file_size = 0
            with open(tmp_path, 'wb') as tmp_file:
--- a/src/services/models_service.py
+++ b/src/services/models_service.py
@ -242,6 +242,35 @@ class ModelsService:
                headers["Authorization"] = f"Bearer {api_key}"
            if project_id:
                headers["Project-ID"] = project_id
            # Validate credentials with a minimal completion request
            async with httpx.AsyncClient() as client:
                validation_url = f"{watson_endpoint}/ml/v1/text/generation"
                validation_params = {"version": "2024-09-16"}
                validation_payload = {
                    "input": "test",
                    "model_id": "ibm/granite-3-2b-instruct",
                    "project_id": project_id,
                    "parameters": {
                        "max_new_tokens": 1,
                    },
                }
                validation_response = await client.post(
                    validation_url,
                    headers=headers,
                    params=validation_params,
                    json=validation_payload,
                    timeout=10.0,
                )
                if validation_response.status_code != 200:
                    raise Exception(
                        f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
                    )
                logger.info("IBM Watson credentials validated successfully")
            # Fetch foundation models using the correct endpoint
            models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
--- a/src/tui/init.py
+++ b/src/tui/init.py
@ -1 +1,8 @@
 """OpenRAG Terminal User Interface package."""
 from importlib.metadata import version
 try:
    __version__ = version("openrag")
 except Exception:
    __version__ = "unknown"
--- a/src/tui/_assets/docker-compose-cpu.yml
+++ b/src/tui/_assets/docker-compose-cpu.yml
@ -1,121 +0,0 @@
 services:
  opensearch:
    image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
    #build:
    #  context: .
    #  dockerfile: Dockerfile
    container_name: os
    depends_on:
      - openrag-backend
    environment:
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
    # Run security setup in background after OpenSearch starts
    command: >
      bash -c "
        # Start OpenSearch in background
        /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
        # Wait a bit for OpenSearch to start, then apply security config
        sleep 10 && /usr/share/opensearch/setup-security.sh &
        # Wait for background processes
        wait
      "
    ports:
      - "9200:9200"
      - "9600:9600"
  dashboards:
    image: opensearchproject/opensearch-dashboards:3.0.0
    container_name: osdash
    depends_on:
      - opensearch
    environment:
      OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
      OPENSEARCH_USERNAME: "admin"
      OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
    ports:
      - "5601:5601"
  openrag-backend:
    image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
    #build:
    #context: .
    #dockerfile: Dockerfile.backend
    container_name: openrag-backend
    depends_on:
      - langflow
    environment:
      - OPENSEARCH_HOST=opensearch
      - LANGFLOW_URL=http://langflow:7860
      - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
      - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
      - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
      - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
      - OPENSEARCH_PORT=9200
      - OPENSEARCH_USERNAME=admin
      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - NVIDIA_VISIBLE_DEVICES=all
      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
      - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
      - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
      - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
    volumes:
      - ./documents:/app/documents:Z
      - ./keys:/app/keys:Z
      - ./flows:/app/flows:Z
  openrag-frontend:
    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
    #build:
    #context: .
    #dockerfile: Dockerfile.frontend
    container_name: openrag-frontend
    depends_on:
      - openrag-backend
    environment:
      - OPENRAG_BACKEND_HOST=openrag-backend
    ports:
      - "3000:3000"
  langflow:
    volumes:
      - ./flows:/app/flows:Z
    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
    container_name: langflow
    ports:
      - "7860:7860"
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
      - JWT=None  
      - OWNER=None
      - OWNER_NAME=None
      - OWNER_EMAIL=None
      - CONNECTOR_TYPE=system
      - OPENRAG-QUERY-FILTER="{}"
      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - FILENAME=None
      - MIMETYPE=None
      - FILESIZE=0
      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
      - LANGFLOW_LOG_LEVEL=DEBUG
      - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
      - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
      - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
      # - DEFAULT_FOLDER_NAME=OpenRAG
      - HIDE_GETTING_STARTED_PROGRESS=true
--- a/src/tui/_assets/docker-compose-cpu.yml
+++ b/src/tui/_assets/docker-compose-cpu.yml
@ -0,0 +1 @@
 ../../../docker-compose-cpu.yml
--- a/src/tui/_assets/docker-compose.yml
+++ b/src/tui/_assets/docker-compose.yml
@ -1,121 +0,0 @@
 services:
  opensearch:
    image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
    #build:
    #context: .
    #dockerfile: Dockerfile
    container_name: os
    depends_on:
      - openrag-backend
    environment:
      - discovery.type=single-node
      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
    # Run security setup in background after OpenSearch starts
    command: >
      bash -c "
        # Start OpenSearch in background
        /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
        # Wait a bit for OpenSearch to start, then apply security config
        sleep 10 && /usr/share/opensearch/setup-security.sh &
        # Wait for background processes
        wait
      "
    ports:
      - "9200:9200"
      - "9600:9600"
  dashboards:
    image: opensearchproject/opensearch-dashboards:3.0.0
    container_name: osdash
    depends_on:
      - opensearch
    environment:
      OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
      OPENSEARCH_USERNAME: "admin"
      OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
    ports:
      - "5601:5601"
  openrag-backend:
    image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
    #build:
    #context: .
    #dockerfile: Dockerfile.backend
    container_name: openrag-backend
    depends_on:
      - langflow
    environment:
      - OPENSEARCH_HOST=opensearch
      - LANGFLOW_URL=http://langflow:7860
      - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
      - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
      - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
      - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
      - OPENSEARCH_PORT=9200
      - OPENSEARCH_USERNAME=admin
      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - NVIDIA_VISIBLE_DEVICES=all
      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
      - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
      - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
      - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
    volumes:
      - ./documents:/app/documents:Z
      - ./keys:/app/keys:Z
      - ./flows:/app/flows:Z
    gpus: all
  openrag-frontend:
    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
    #build:
    #context: .
    #dockerfile: Dockerfile.frontend
    container_name: openrag-frontend
    depends_on:
      - openrag-backend
    environment:
      - OPENRAG_BACKEND_HOST=openrag-backend
    ports:
      - "3000:3000"
  langflow:
    volumes:
      - ./flows:/app/flows:Z
    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
    container_name: langflow
    ports:
      - "7860:7860"
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
      - JWT=None  
      - OWNER=None
      - OWNER_NAME=None
      - OWNER_EMAIL=None
      - CONNECTOR_TYPE=system
      - OPENRAG-QUERY-FILTER="{}"
      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
      - FILENAME=None
      - MIMETYPE=None
      - FILESIZE=0
      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
      - LANGFLOW_LOG_LEVEL=DEBUG
      - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
      - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
      - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
      # - DEFAULT_FOLDER_NAME="OpenRAG"
      - HIDE_GETTING_STARTED_PROGRESS=true
--- a/src/tui/_assets/docker-compose.yml
+++ b/src/tui/_assets/docker-compose.yml
@ -0,0 +1 @@
 ../../../docker-compose.yml
--- a/src/tui/_assets/documents/2506.08231v1.pdf
+++ b/src/tui/_assets/documents/2506.08231v1.pdf
--- a/src/tui/_assets/documents/2506.08231v1.pdf
+++ b/src/tui/_assets/documents/2506.08231v1.pdf
@ -0,0 +1 @@
 ../../../../documents/2506.08231v1.pdf
--- a/src/tui/_assets/documents/ai-human-resources.pdf
+++ b/src/tui/_assets/documents/ai-human-resources.pdf
--- a/src/tui/_assets/documents/ai-human-resources.pdf
+++ b/src/tui/_assets/documents/ai-human-resources.pdf
@ -0,0 +1 @@
 ../../../../documents/ai-human-resources.pdf
--- a/src/tui/_assets/documents/warmup_ocr.pdf
+++ b/src/tui/_assets/documents/warmup_ocr.pdf
--- a/src/tui/_assets/documents/warmup_ocr.pdf
+++ b/src/tui/_assets/documents/warmup_ocr.pdf
@ -0,0 +1 @@
 ../../../../documents/warmup_ocr.pdf
--- a/src/tui/_assets/flows/components/ollama_embedding.json
+++ b/src/tui/_assets/flows/components/ollama_embedding.json
@ -0,0 +1 @@
 ../../../../../flows/components/ollama_embedding.json
--- a/src/tui/_assets/flows/components/ollama_llm.json
+++ b/src/tui/_assets/flows/components/ollama_llm.json
@ -0,0 +1 @@
 ../../../../../flows/components/ollama_llm.json
--- a/src/tui/_assets/flows/components/ollama_llm_text.json
+++ b/src/tui/_assets/flows/components/ollama_llm_text.json
@ -0,0 +1 @@
 ../../../../../flows/components/ollama_llm_text.json
--- a/src/tui/_assets/flows/components/watsonx_embedding.json
+++ b/src/tui/_assets/flows/components/watsonx_embedding.json
@ -0,0 +1 @@
 ../../../../../flows/components/watsonx_embedding.json
--- a/src/tui/_assets/flows/components/watsonx_llm.json
+++ b/src/tui/_assets/flows/components/watsonx_llm.json
@ -0,0 +1 @@
 ../../../../../flows/components/watsonx_llm.json
--- a/src/tui/_assets/flows/components/watsonx_llm_text.json
+++ b/src/tui/_assets/flows/components/watsonx_llm_text.json
@ -0,0 +1 @@
 ../../../../../flows/components/watsonx_llm_text.json
--- a/src/tui/_assets/flows/ingestion_flow.json
+++ b/src/tui/_assets/flows/ingestion_flow.json
@ -0,0 +1 @@
 ../../../../flows/ingestion_flow.json
--- a/src/tui/_assets/flows/openrag_agent.json
+++ b/src/tui/_assets/flows/openrag_agent.json
@ -0,0 +1 @@
 ../../../../flows/openrag_agent.json
--- a/src/tui/_assets/flows/openrag_ingest_docling.json
+++ b/src/tui/_assets/flows/openrag_ingest_docling.json
@ -0,0 +1 @@
 ../../../../flows/openrag_ingest_docling.json
--- a/src/tui/_assets/flows/openrag_nudges.json
+++ b/src/tui/_assets/flows/openrag_nudges.json
@ -0,0 +1 @@
 ../../../../flows/openrag_nudges.json
--- a/src/tui/_assets/flows/openrag_url_mcp.json
+++ b/src/tui/_assets/flows/openrag_url_mcp.json
@ -0,0 +1 @@
 ../../../../flows/openrag_url_mcp.json
--- a/src/tui/main.py
+++ b/src/tui/main.py
@ -2,6 +2,7 @@
 import sys
 from pathlib import Path
 from typing import Iterable, Optional
 from textual.app import App, ComposeResult
 from utils.logging_config import get_logger
 try:
@ -305,41 +306,103 @@ class OpenRAGTUI(App):
        return True, "Runtime requirements satisfied"
-def copy_sample_documents():
+def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
    """Copy packaged assets into destination and optionally overwrite existing files.
    When ``force`` is True, files are refreshed if the packaged bytes differ.
    """
    destination.mkdir(parents=True, exist_ok=True)
    for resource in resource_tree.iterdir():
        target_path = destination / resource.name
        if resource.is_dir():
            _copy_assets(resource, target_path, allowed_suffixes, force=force)
            continue
        if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
            continue
        resource_bytes = resource.read_bytes()
        if target_path.exists():
            if not force:
                continue
            try:
                if target_path.read_bytes() == resource_bytes:
                    continue
            except Exception as read_error:
                logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
        target_path.write_bytes(resource_bytes)
        logger.info(f"Copied bundled asset: {target_path}")
 def copy_sample_documents(*, force: bool = False) -> None:
    """Copy sample documents from package to current directory if they don't exist."""
    documents_dir = Path("documents")
    # Check if documents directory already exists and has files
    if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
        return  # Documents already exist, don't overwrite
    try:
        # Get sample documents from package assets
        assets_files = files("tui._assets.documents")
-
+        _copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
        # Create documents directory if it doesn't exist
        documents_dir.mkdir(exist_ok=True)
        # Copy each sample document
        for resource in assets_files.iterdir():
            if resource.is_file() and resource.name.endswith('.pdf'):
                dest_path = documents_dir / resource.name
                if not dest_path.exists():
                    content = resource.read_bytes()
                    dest_path.write_bytes(content)
                    logger.info(f"Copied sample document: {resource.name}")
    except Exception as e:
        logger.debug(f"Could not copy sample documents: {e}")
        # This is not a critical error - the app can work without sample documents
 def copy_sample_flows(*, force: bool = False) -> None:
    """Copy sample flows from package to current directory if they don't exist."""
    flows_dir = Path("flows")
    try:
        assets_files = files("tui._assets.flows")
        _copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
    except Exception as e:
        logger.debug(f"Could not copy sample flows: {e}")
        # The app can proceed without bundled flows
 def copy_compose_files(*, force: bool = False) -> None:
    """Copy docker-compose templates into the workspace if they are missing."""
    try:
        assets_root = files("tui._assets")
    except Exception as e:
        logger.debug(f"Could not access compose assets: {e}")
        return
    for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
        destination = Path(filename)
        if destination.exists() and not force:
            continue
        try:
            resource = assets_root.joinpath(filename)
            if not resource.is_file():
                logger.debug(f"Compose template not found in assets: {filename}")
                continue
            resource_bytes = resource.read_bytes()
            if destination.exists():
                try:
                    if destination.read_bytes() == resource_bytes:
                        continue
                except Exception as read_error:
                    logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
            destination.write_bytes(resource_bytes)
            logger.info(f"Copied docker-compose template: {filename}")
        except Exception as error:
            logger.debug(f"Could not copy compose file {filename}: {error}")
 def run_tui():
    """Run the OpenRAG TUI application."""
    app = None
    try:
-        # Copy sample documents on first run
+        # Keep bundled assets aligned with the packaged versions
-        copy_sample_documents()
+        copy_sample_documents(force=True)
        copy_sample_flows(force=True)
        copy_compose_files(force=True)
        app = OpenRAGTUI()
        app.run()
--- a/src/tui/managers/docling_manager.py
+++ b/src/tui/managers/docling_manager.py
@ -8,7 +8,6 @@ import threading
 import time
 from typing import Optional, Tuple, Dict, Any, List, AsyncIterator
 from utils.logging_config import get_logger
 from utils.container_utils import guess_host_ip_for_containers
 logger = get_logger(__name__)
@ -32,7 +31,8 @@ class DoclingManager:
        self._process: Optional[subprocess.Popen] = None
        self._port = 5001
-        self._host = guess_host_ip_for_containers(logger=logger)  # Get appropriate host IP based on runtime
+        # Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var)
        self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0')
        self._running = False
        self._external_process = False
@ -150,16 +150,20 @@ class DoclingManager:
            else:
                pid = self._load_pid()
            # Use localhost for display URLs when bound to 0.0.0.0
            display_host = "localhost" if self._host == "0.0.0.0" else self._host
            return {
                "status": "running",
                "port": self._port,
                "host": self._host,
-                "endpoint": f"http://{self._host}:{self._port}",
+                "endpoint": f"http://{display_host}:{self._port}",
-                "docs_url": f"http://{self._host}:{self._port}/docs",
+                "docs_url": f"http://{display_host}:{self._port}/docs",
-                "ui_url": f"http://{self._host}:{self._port}/ui",
+                "ui_url": f"http://{display_host}:{self._port}/ui",
                "pid": pid
            }
        else:
            display_host = "localhost" if self._host == "0.0.0.0" else self._host
            return {
                "status": "stopped",
                "port": self._port,
@ -176,10 +180,9 @@ class DoclingManager:
            return False, "Docling serve is already running"
        self._port = port
-        # Use provided host or the bridge IP we detected in __init__
+        # Use provided host or keep default from __init__
        if host is not None:
            self._host = host
        # else: keep self._host as already set in __init__
        # Check if port is already in use before trying to start
        import socket
@ -293,7 +296,8 @@ class DoclingManager:
                self._running = False
                return False, f"Docling serve process exited immediately (code: {return_code})"
-            return True, f"Docling serve starting on http://{host}:{port}"
+            display_host = "localhost" if self._host == "0.0.0.0" else self._host
            return True, f"Docling serve starting on http://{display_host}:{port}"
        except FileNotFoundError:
            return False, "docling-serve not available. Please install: uv add docling-serve"
@ -454,7 +458,8 @@ class DoclingManager:
    async def follow_logs(self) -> AsyncIterator[str]:
        """Follow logs from the docling-serve process in real-time."""
        # First yield status message and any existing logs
-        status_msg = f"Docling serve is running on http://{self._host}:{self._port}"
+        display_host = "localhost" if self._host == "0.0.0.0" else self._host
        status_msg = f"Docling serve is running on http://{display_host}:{self._port}"
        with self._log_lock:
            if self._log_buffer:
--- a/src/tui/screens/welcome.py
+++ b/src/tui/screens/welcome.py
@ -10,6 +10,7 @@ from rich.text import Text
 from rich.align import Align
 from dotenv import load_dotenv
 from .. import __version__
 from ..managers.container_manager import ContainerManager, ServiceStatus
 from ..managers.env_manager import EnvManager
 from ..managers.docling_manager import DoclingManager
@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
 ╚═════╝ ╚═╝     ╚══════╝╚═╝  ╚═══╝╚═╝  ╚═╝╚═╝  ╚═╝╚═════╝
 """
        welcome_text.append(ascii_art, style="bold white")
-        welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
+        welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
        welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
        # Check if all services are running
        all_services_running = self.services_running and self.docling_running
--- a/src/utils/container_utils.py
+++ b/src/utils/container_utils.py
@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
    import logging
    import re
    import shutil
    import socket
    import subprocess
    log = logger or logging.getLogger(__name__)
    def can_bind_to_address(ip_addr: str) -> bool:
        """Test if we can bind to the given IP address."""
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
                sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                sock.bind((ip_addr, 0))  # Port 0 = let OS choose a free port
                return True
        except (OSError, socket.error) as e:
            log.debug("Cannot bind to %s: %s", ip_addr, e)
            return False
    def run(cmd, timeout=2, text=True):
        return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
                "Container-reachable host IP candidates: %s",
                ", ".join(ordered_candidates),
            )
        else:
            log.info("Container-reachable host IP: %s", ordered_candidates[0])
-        return ordered_candidates[0]
+        # Try each candidate and return the first one we can bind to
        for ip_addr in ordered_candidates:
            if can_bind_to_address(ip_addr):
                if len(ordered_candidates) > 1:
                    log.info("Selected bindable host IP: %s", ip_addr)
                else:
                    log.info("Container-reachable host IP: %s", ip_addr)
                return ip_addr
            log.debug("Skipping %s (cannot bind)", ip_addr)
        # None of the candidates were bindable, fall back to 127.0.0.1
        log.warning(
            "None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
            ", ".join(ordered_candidates),
        )
        return "127.0.0.1"
    log.warning(
        "No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1 @@
 # Test package
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -0,0 +1,85 @@
 import asyncio
 import os
 import tempfile
 from pathlib import Path
 import pytest
 import pytest_asyncio
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Force no-auth mode for testing by setting OAuth credentials to empty strings
 # This ensures anonymous JWT tokens are created automatically
 os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
 os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
 from src.config.settings import clients
 from src.session_manager import SessionManager
 from src.main import generate_jwt_keys
@pytest.fixture(scope="session")
 def event_loop():
    """Create an instance of the default event loop for the test session."""
    loop = asyncio.get_event_loop_policy().new_event_loop()
    yield loop
    loop.close()
@pytest_asyncio.fixture
 async def opensearch_client():
    """OpenSearch client for testing - requires running OpenSearch."""
    await clients.initialize()
    yield clients.opensearch
    # Cleanup test indices after tests
    try:
        await clients.opensearch.indices.delete(index="test_documents")
    except Exception:
        pass
@pytest.fixture
 def session_manager():
    """Session manager for testing."""
    # Generate RSA keys before creating SessionManager
    generate_jwt_keys()
    sm = SessionManager("test-secret-key")
    print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
    return sm
@pytest.fixture
 def test_documents_dir():
    """Create a temporary directory with test documents."""
    with tempfile.TemporaryDirectory() as temp_dir:
        test_dir = Path(temp_dir)
        # Create some test files in supported formats
        (test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
        (test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
        (test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
        # Create subdirectory with files
        sub_dir = test_dir / "subdir"
        sub_dir.mkdir()
        (sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
        yield test_dir
@pytest.fixture
 def test_single_file():
    """Create a single test file."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
        f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
        temp_path = f.name
    yield temp_path
    # Cleanup
    try:
        os.unlink(temp_path)
    except FileNotFoundError:
        pass
--- a/tests/integration/init.py
+++ b/tests/integration/init.py
@ -0,0 +1 @@
 # Integration tests package
--- a/tests/integration/test_api_endpoints.py
+++ b/tests/integration/test_api_endpoints.py
@ -0,0 +1,296 @@
 import asyncio
 import os
 from pathlib import Path
 import httpx
 import pytest
 async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
    """Poll existing endpoints until the app and OpenSearch are ready.
    Strategy:
    - GET /auth/me should return 200 immediately (confirms app is up).
    - POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
    """
    # First test OpenSearch JWT directly
    from src.session_manager import SessionManager, AnonymousUser
    import os
    import hashlib
    import jwt as jwt_lib
    sm = SessionManager("test")
    test_token = sm.create_jwt_token(AnonymousUser())
    token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
    print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
    print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
    with open(sm.public_key_path, 'rb') as f:
        pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
    print(f"[DEBUG] Public key hash: {pub_key_hash}")
    # Decode token to see claims
    decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
    print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
    # Test OpenSearch JWT auth directly
    opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
    print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
    async with httpx.AsyncClient(verify=False) as os_client:
        r_os = await os_client.post(
            f"{opensearch_url}/documents/_search",
            headers={"Authorization": f"Bearer {test_token}"},
            json={"query": {"match_all": {}}, "size": 0}
        )
        print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
        if r_os.status_code == 401:
            print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
        else:
            print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
    deadline = asyncio.get_event_loop().time() + timeout_s
    last_err = None
    while asyncio.get_event_loop().time() < deadline:
        try:
            r1 = await client.get("/auth/me")
            print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
            if r1.status_code in (401, 403):
                raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
            if r1.status_code != 200:
                await asyncio.sleep(0.5)
                continue
            # match_all readiness probe; no embeddings
            r2 = await client.post("/search", json={"query": "*", "limit": 0})
            print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
            if r2.status_code in (401, 403):
                print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
                raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
            if r2.status_code == 200:
                print("[DEBUG] Service ready!")
                return
            last_err = r2.text
        except AssertionError:
            raise
        except Exception as e:
            last_err = str(e)
            print(f"[DEBUG] Exception during readiness check: {e}")
        await asyncio.sleep(0.5)
    raise AssertionError(f"Service not ready in time: {last_err}")
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
 async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
    """Boot the ASGI app and exercise /upload and /search endpoints."""
    # Ensure we route uploads to traditional processor and disable startup ingest
    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
    os.environ["DISABLE_STARTUP_INGEST"] = "true"
    # Force no-auth mode so endpoints bypass authentication
    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
    # Import after env vars to ensure settings pick them up. Clear cached modules
    import sys
    # Clear cached modules so settings pick up env and router sees new flag
    for mod in [
        "src.api.router",
        "api.router",  # Also clear the non-src path
        "src.api.connector_router",
        "api.connector_router",
        "src.config.settings",
        "config.settings",
        "src.auth_middleware",
        "auth_middleware",
        "src.main",
        "api",  # Clear the api package itself
        "src.api",
        "services",  # Clear services that import clients
        "src.services",
        "services.search_service",
        "src.services.search_service",
    ]:
        sys.modules.pop(mod, None)
    from src.main import create_app, startup_tasks
    import src.api.router as upload_router
    from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
    # Ensure a clean index before startup
    await clients.initialize()
    try:
        await clients.opensearch.indices.delete(index=INDEX_NAME)
        # Wait for deletion to complete
        await asyncio.sleep(1)
    except Exception:
        pass
    app = await create_app()
    # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
    await startup_tasks(app.state.services)
    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
    from src.main import _ensure_opensearch_index
    await _ensure_opensearch_index()
    # Verify index is truly empty after startup
    try:
        count_response = await clients.opensearch.count(index=INDEX_NAME)
        doc_count = count_response.get('count', 0)
        assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
    except Exception as e:
        # If count fails, the index might not exist yet, which is fine
        pass
    transport = httpx.ASGITransport(app=app)
    try:
        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
            # Wait for app + OpenSearch readiness using existing endpoints
            await wait_for_service_ready(client)
            # Create a temporary markdown file to upload
            file_path = tmp_path / "endpoint_test_doc.md"
            file_text = (
                "# Single Test Document\n\n"
                "This is a test document about OpenRAG testing framework. "
                "The content should be indexed and searchable in OpenSearch after processing."
            )
            file_path.write_text(file_text)
            # POST via router (multipart)
            files = {
                "file": (
                    file_path.name,
                    file_path.read_bytes(),
                    "text/markdown",
                )
            }
            upload_resp = await client.post("/upload", files=files)
            body = upload_resp.json()
            assert upload_resp.status_code == 201, upload_resp.text
            assert body.get("status") in {"indexed", "unchanged"}
            assert isinstance(body.get("id"), str)
            # Poll search for the specific content until it's indexed
            async def _wait_for_indexed(timeout_s: float = 30.0):
                deadline = asyncio.get_event_loop().time() + timeout_s
                while asyncio.get_event_loop().time() < deadline:
                    resp = await client.post(
                        "/search",
                        json={"query": "OpenRAG testing framework", "limit": 5},
                    )
                    if resp.status_code == 200 and resp.json().get("results"):
                        return resp
                    await asyncio.sleep(0.5)
                return resp
            search_resp = await _wait_for_indexed()
            # POST /search
            assert search_resp.status_code == 200, search_resp.text
            search_body = search_resp.json()
            # Basic shape and at least one hit
            assert isinstance(search_body.get("results"), list)
            assert len(search_body["results"]) >= 0
            # When hits exist, confirm our phrase is present in top result content
            if search_body["results"]:
                top = search_body["results"][0]
                assert "text" in top or "content" in top
                text = top.get("text") or top.get("content")
                assert isinstance(text, str)
                assert "testing" in text.lower()
    finally:
        # Explicitly close global clients to avoid aiohttp warnings
        from src.config.settings import clients
        try:
            await clients.close()
        except Exception:
            pass
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
 async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
    """Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
    os.environ["DISABLE_STARTUP_INGEST"] = "true"
    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
    import sys
    for mod in [
        "src.api.router",
        "api.router",  # Also clear the non-src path
        "src.api.connector_router",
        "api.connector_router",
        "src.config.settings",
        "config.settings",
        "src.auth_middleware",
        "auth_middleware",
        "src.main",
        "api",  # Clear the api package itself
        "src.api",
        "services",  # Clear services that import clients
        "src.services",
        "services.search_service",
        "src.services.search_service",
    ]:
        sys.modules.pop(mod, None)
    from src.main import create_app, startup_tasks
    import src.api.router as upload_router
    from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
    # Ensure a clean index before startup
    await clients.initialize()
    try:
        await clients.opensearch.indices.delete(index=INDEX_NAME)
        # Wait for deletion to complete
        await asyncio.sleep(1)
    except Exception:
        pass
    app = await create_app()
    await startup_tasks(app.state.services)
    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
    from src.main import _ensure_opensearch_index
    await _ensure_opensearch_index()
    # Verify index is truly empty after startup
    try:
        count_response = await clients.opensearch.count(index=INDEX_NAME)
        doc_count = count_response.get('count', 0)
        assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
    except Exception as e:
        # If count fails, the index might not exist yet, which is fine
        pass
    transport = httpx.ASGITransport(app=app)
    try:
        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
            await wait_for_service_ready(client)
            file_path = tmp_path / "router_test_doc.md"
            file_path.write_text("# Router Test\n\nThis file validates the upload router.")
            files = {
                "file": (
                    file_path.name,
                    file_path.read_bytes(),
                    "text/markdown",
                )
            }
            resp = await client.post("/router/upload_ingest", files=files)
            data = resp.json()
            print(f"data: {data}")
            if disable_langflow_ingest:
                assert resp.status_code == 201 or resp.status_code == 202, resp.text
                assert data.get("status") in {"indexed", "unchanged"}
                assert isinstance(data.get("id"), str)
            else:
                assert resp.status_code == 201 or resp.status_code == 202, resp.text
                assert isinstance(data.get("task_id"), str)
                assert data.get("file_count") == 1
    finally:
        from src.config.settings import clients
        try:
            await clients.close()
        except Exception:
            pass
--- a/tests/integration/test_startup_ingest.py
+++ b/tests/integration/test_startup_ingest.py
@ -0,0 +1,118 @@
 import asyncio
 import os
 from pathlib import Path
 import httpx
 import pytest
 async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
    deadline = asyncio.get_event_loop().time() + timeout_s
    last_err = None
    while asyncio.get_event_loop().time() < deadline:
        try:
            r1 = await client.get("/auth/me")
            if r1.status_code != 200:
                await asyncio.sleep(0.5)
                continue
            r2 = await client.post("/search", json={"query": "*", "limit": 0})
            if r2.status_code == 200:
                return
            last_err = r2.text
        except Exception as e:
            last_err = str(e)
        await asyncio.sleep(0.5)
    raise AssertionError(f"Service not ready in time: {last_err}")
 def count_files_in_documents() -> int:
    base_dir = Path(os.getcwd()) / "documents"
    if not base_dir.is_dir():
        return 0
    return sum(1 for _ in base_dir.rglob("*") if _.is_file())
@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
@pytest.mark.asyncio
 async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
    # Ensure startup ingest runs and choose pipeline per param
    os.environ["DISABLE_STARTUP_INGEST"] = "false"
    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
        "true" if disable_langflow_ingest else "false"
    )
    # Force no-auth mode for simpler endpoint access
    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
    # Reload settings to pick up env for this test run
    import sys
    for mod in [
        "src.api.router",
        "src.api.connector_router",
        "src.config.settings",
        "src.auth_middleware",
        "src.main",
    ]:
        sys.modules.pop(mod, None)
    from src.main import create_app, startup_tasks
    from src.config.settings import clients, INDEX_NAME
    # Ensure a clean index before startup
    await clients.initialize()
    try:
        await clients.opensearch.indices.delete(index=INDEX_NAME)
    except Exception:
        pass
    app = await create_app()
    # Trigger startup tasks explicitly
    await startup_tasks(app.state.services)
    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
    from src.main import _ensure_opensearch_index
    await _ensure_opensearch_index()
    transport = httpx.ASGITransport(app=app)
    try:
        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
            await wait_for_ready(client)
            expected_files = count_files_in_documents()
            # Poll /tasks until we see at least one startup ingest task
            async def _wait_for_task(timeout_s: float = 60.0):
                deadline = asyncio.get_event_loop().time() + timeout_s
                last = None
                while asyncio.get_event_loop().time() < deadline:
                    resp = await client.get("/tasks")
                    if resp.status_code == 200:
                        data = resp.json()
                        last = data
                        tasks = data.get("tasks") if isinstance(data, dict) else None
                        if isinstance(tasks, list) and len(tasks) > 0:
                            return tasks
                    await asyncio.sleep(0.5)
                return last.get("tasks") if isinstance(last, dict) else last
            tasks = await _wait_for_task()
            if expected_files == 0:
                return  # Nothing to do
            if not (isinstance(tasks, list) and len(tasks) > 0):
                # Fallback: verify that documents were indexed as a sign of startup ingest
                sr = await client.post("/search", json={"query": "*", "limit": 1})
                assert sr.status_code == 200, sr.text
                total = sr.json().get("total")
                assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
                return
            newest = tasks[0]
            assert "task_id" in newest
            assert newest.get("total_files") == expected_files
    finally:
        # Explicitly close global clients to avoid aiohttp warnings
        from src.config.settings import clients
        try:
            await clients.close()
        except Exception:
            pass
--- a/uv.lock
+++ b/uv.lock
@ -2,10 +2,10 @@ version = 1
 revision = 2
 requires-python = ">=3.13"
 resolution-markers = [
    "sys_platform == 'darwin'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
    "sys_platform == 'darwin'",
 ]
 [[package]]
@ -291,8 +291,8 @@ name = "click"
 version = "8.2.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@ -312,6 +312,67 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 [[package]]
 name = "coverage"
 version = "7.10.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
    { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
    { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
    { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
    { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
    { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
    { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
    { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
    { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
    { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
    { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
    { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
    { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
    { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
    { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
    { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
    { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
    { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
    { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
    { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
    { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
    { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
    { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
    { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
    { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
    { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
    { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
    { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
    { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
    { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
    { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
    { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
    { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
    { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
    { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
    { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
    { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
    { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
    { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
    { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
    { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
    { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
    { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
    { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
    { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
    { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
    { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
    { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
    { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
    { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
    { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
    { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
    { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
 ]
 [[package]]
 name = "cramjam"
 version = "2.11.0"
@ -454,8 +515,8 @@ name = "dill"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@ -619,8 +680,8 @@ name = "docling-mcp"
 version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@ -943,8 +1004,8 @@ name = "fsspec"
 version = "2025.5.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@ -1264,8 +1325,8 @@ name = "huggingface-hub"
 version = "0.33.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@ -1339,6 +1400,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
 ]
 [[package]]
 name = "iniconfig"
 version = "2.1.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
 ]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@ -1960,8 +2030,8 @@ name = "multiprocess"
 version = "0.70.18"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "platform_machine == 'x86_64' and sys_platform == 'linux'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@ -2282,7 +2352,7 @@ wheels = [
 [[package]]
 name = "openrag"
-version = "0.1.14.dev3"
+version = "0.1.19"
 source = { editable = "." }
 dependencies = [
    { name = "agentd" },
@ -2312,6 +2382,14 @@ dependencies = [
    { name = "uvicorn" },
 ]
 [package.dev-dependencies]
 dev = [
    { name = "pytest" },
    { name = "pytest-asyncio" },
    { name = "pytest-cov" },
    { name = "pytest-mock" },
 ]
 [package.metadata]
 requires-dist = [
    { name = "agentd", specifier = ">=0.2.2" },
@ -2341,6 +2419,14 @@ requires-dist = [
    { name = "uvicorn", specifier = ">=0.35.0" },
 ]
 [package.metadata.requires-dev]
 dev = [
    { name = "pytest", specifier = ">=8" },
    { name = "pytest-asyncio", specifier = ">=0.21.0" },
    { name = "pytest-cov", specifier = ">=4.0.0" },
    { name = "pytest-mock", specifier = ">=3.12.0" },
 ]
 [[package]]
 name = "opensearch-py"
 version = "3.0.0"
@ -2836,6 +2922,60 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
 ]
 [[package]]
 name = "pytest"
 version = "8.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "colorama", marker = "sys_platform == 'win32'" },
    { name = "iniconfig" },
    { name = "packaging" },
    { name = "pluggy" },
    { name = "pygments" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
 ]
 [[package]]
 name = "pytest-asyncio"
 version = "1.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pytest" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
 ]
 [[package]]
 name = "pytest-cov"
 version = "7.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "coverage" },
    { name = "pluggy" },
    { name = "pytest" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
 ]
 [[package]]
 name = "pytest-mock"
 version = "3.15.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pytest" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
 ]
 [[package]]
 name = "python-bidi"
 version = "0.6.6"
@ -3622,9 +3762,9 @@ name = "torch"
 version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "sys_platform == 'darwin'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
    "sys_platform == 'darwin'",
 ]
 dependencies = [
    { name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@ -3669,9 +3809,9 @@ name = "torchvision"
 version = "0.23.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "sys_platform == 'darwin'",
    "platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
    "sys_platform == 'darwin'",
 ]
 dependencies = [
    { name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
`@ -1,4 +1,4 @@`
	`FROM langflowai/langflow-nightly:1.6.3.dev0`	`FROM langflowai/langflow-nightly:1.6.3.dev1`

	`EXPOSE 7860`	`EXPOSE 7860`
		`@ -0,0 +1 @@`
							`../../../../documents/ai-human-resources.pdf`
		`@ -0,0 +1 @@`
							`../../../../../flows/components/ollama_embedding.json`
		`@ -0,0 +1 @@`
							`../../../../../flows/components/watsonx_embedding.json`
		`@ -0,0 +1 @@`
							`../../../../flows/openrag_ingest_docling.json`