diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index ca39b6e3..00000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/.env.example b/.env.example
index 8d412670..f790ce09 100644
--- a/.env.example
+++ b/.env.example
@@ -37,6 +37,9 @@ AWS_SECRET_ACCESS_KEY=
 # OPTIONAL url for openrag link to langflow in the UI
 LANGFLOW_PUBLIC_URL=
 
+# OPTIONAL: Override host for docling service (for special networking setups)
+# HOST_DOCKER_INTERNAL=host.containers.internal
+
 # Langflow auth
 LANGFLOW_AUTO_LOGIN=False
 LANGFLOW_SUPERUSER=
diff --git a/.github/workflows/build-langflow-responses.yml b/.github/workflows/build-langflow-responses.yml
deleted file mode 100644
index 0f9d3d08..00000000
--- a/.github/workflows/build-langflow-responses.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: Build Langflow Responses Multi-Arch
-
-on:
-  workflow_dispatch:
-
-jobs:
-  build:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - platform: linux/amd64
-            arch: amd64
-            runs-on: ubuntu-latest
-          - platform: linux/arm64
-            arch: arm64
-            runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-2]
-
-    runs-on: ${{ matrix.runs-on }}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: Build and push langflow (${{ matrix.arch }})
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./Dockerfile.langflow
-          platforms: ${{ matrix.platform }}
-          push: true
-          tags: phact/langflow:responses-${{ matrix.arch }}
-          cache-from: type=gha,scope=langflow-responses-${{ matrix.arch }}
-          cache-to: type=gha,mode=max,scope=langflow-responses-${{ matrix.arch }}
-
-  manifest:
-    needs: build
-    runs-on: ubuntu-latest
-    steps:
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: Create and push multi-arch manifest
-        run: |
-          docker buildx imagetools create -t phact/langflow:responses \
-            phact/langflow:responses-amd64 \
-            phact/langflow:responses-arm64
\ No newline at end of file
diff --git a/.github/workflows/build-multiarch.yml b/.github/workflows/build-multiarch.yml
index 620bcf3b..64c13b91 100644
--- a/.github/workflows/build-multiarch.yml
+++ b/.github/workflows/build-multiarch.yml
@@ -1,16 +1,95 @@
-name: Build Multi-Architecture Docker Images
+name: Release + Docker Images (multi-arch)
 
 on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'pyproject.toml'
   workflow_dispatch:
-    inputs:
-      update_latest:
-        description: 'Update latest tags (production release)'
-        required: false
-        default: false
-        type: boolean
 
 jobs:
+  build-python-packages:
+    runs-on: ubuntu-latest
+    outputs:
+      skip_release: ${{ steps.version.outputs.skip_release }}
+      version: ${{ steps.version.outputs.version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+
+      - name: Extract version from pyproject.toml
+        id: version
+        run: |
+          VERSION=$(grep '^version = ' pyproject.toml | cut -d '"' -f 2)
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+          echo "Version: $VERSION"
+
+          # Check if tag already exists
+          if git rev-parse "v$VERSION" >/dev/null 2>&1; then
+            echo "Tag v$VERSION already exists, skipping release"
+            echo "skip_release=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          echo "skip_release=false" >> $GITHUB_OUTPUT
+
+          # Check if version is numeric (e.g., 0.1.16) vs prerelease (e.g., 0.1.16-rc1)
+          if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
+            echo "is_prerelease=false" >> $GITHUB_OUTPUT
+            echo "Release type: Production"
+          else
+            echo "is_prerelease=true" >> $GITHUB_OUTPUT
+            echo "Release type: Prerelease"
+          fi
+
+      - name: Build wheel and source distribution
+        if: steps.version.outputs.skip_release != 'true'
+        run: |
+          uv build
+
+      - name: List built artifacts
+        if: steps.version.outputs.skip_release != 'true'
+        run: |
+          ls -la dist/
+          echo "Built artifacts:"
+          for file in dist/*; do
+            echo "  - $(basename $file) ($(stat -c%s $file | numfmt --to=iec-i)B)"
+          done
+
+      - name: Upload build artifacts
+        if: steps.version.outputs.skip_release != 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-packages
+          path: dist/
+          retention-days: 30
+
+      - name: Create Release
+        if: steps.version.outputs.skip_release != 'true'
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: v${{ steps.version.outputs.version }}
+          name: Release ${{ steps.version.outputs.version }}
+          draft: false
+          prerelease: ${{ steps.version.outputs.is_prerelease }}
+          generate_release_notes: true
+          files: |
+            dist/*.whl
+            dist/*.tar.gz
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
   build:
+    needs: build-python-packages
+    if: needs.build-python-packages.outputs.skip_release != 'true'
     strategy:
       fail-fast: false
       matrix:
@@ -106,9 +185,9 @@ jobs:
           cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.arch }}
 
   manifest:
-    needs: build
+    needs: [build, build-python-packages]
     runs-on: ubuntu-latest
-    if: github.event_name != 'pull_request'
+    if: github.event_name != 'pull_request' && needs.build-python-packages.outputs.skip_release != 'true'
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -146,8 +225,8 @@ jobs:
             phact/openrag-opensearch:$VERSION-amd64 \
             phact/openrag-opensearch:$VERSION-arm64
 
-          # Only update latest tags if version is numeric AND checkbox is checked
-          if [[ "$VERSION" =~ ^[0-9.-]+$ ]] && [[ "${{ github.event.inputs.update_latest }}" == "true" ]]; then
+          # Only update latest tags if version is numeric
+          if [[ "$VERSION" =~ ^[0-9.-]+$ ]]; then
             echo "Updating latest tags for production release: $VERSION"
             docker buildx imagetools create -t phact/openrag-backend:latest \
               phact/openrag-backend:$VERSION-amd64 \
@@ -165,5 +244,5 @@ jobs:
               phact/openrag-opensearch:$VERSION-amd64 \
               phact/openrag-opensearch:$VERSION-arm64
           else
-            echo "Skipping latest tags - version: $VERSION, update_latest: ${{ github.event.inputs.update_latest }}"
+            echo "Skipping latest tags - version: $VERSION (not numeric)"
           fi
diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml
new file mode 100644
index 00000000..16f33c41
--- /dev/null
+++ b/.github/workflows/test-integration.yml
@@ -0,0 +1,54 @@
+name: Integration Tests
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  tests:
+    runs-on: [self-hosted, linux, ARM64, langflow-ai-arm64-40gb]
+    env:
+      # Prefer repository/environment variable first, then secret, then a sane fallback
+      OPENSEARCH_PASSWORD: ${{ vars.OPENSEARCH_PASSWORD || secrets.OPENSEARCH_PASSWORD || 'OpenRag#2025!' }}
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+    steps:
+      - run: df -h
+        #- name: "node-cleanup"
+        #run: |
+        #  sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
+        #  sudo docker image prune --all --force
+        #  sudo docker builder prune -a
+      - run: df -h
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up UV
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: latest
+
+      - name: Python version
+        run: uv python install 3.13
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Run integration tests
+        env:
+          OPENSEARCH_HOST: localhost
+          OPENSEARCH_PORT: 9200
+          OPENSEARCH_USERNAME: admin
+          OPENSEARCH_PASSWORD: ${{ env.OPENSEARCH_PASSWORD }}
+          LOG_LEVEL: DEBUG
+          # Force no-auth mode so tests bypass OAuth
+          GOOGLE_OAUTH_CLIENT_ID: ""
+          GOOGLE_OAUTH_CLIENT_SECRET: ""
+          # Disable startup ingest noise unless a test enables it
+          DISABLE_STARTUP_INGEST: "true"
+        run: |
+          make test-ci
+          echo "Keys directory after tests:"
+          ls -la keys/ || echo "No keys directory"
diff --git a/.gitignore b/.gitignore
index 484db58d..625097a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,8 @@ wheels/
 1001*.pdf
 *.json
 !flows/*.json
+!src/tui/_assets/flows/*.json
+!src/tui/_assets/flows/components/*.json
 .DS_Store
 
 config/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 19b01709..6b8cd832 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,20 +11,48 @@ Thank you for your interest in contributing to OpenRAG! This guide will help you
 - Python 3.13+ with uv package manager
 - Node.js 18+ and npm
 
-### Environment Setup
+### Set up OpenRAG for development
+
+1. Set up your development environment.
 
 ```bash
-# Clone the repository
-git clone <repository-url>
+# Clone and setup environment
+git clone https://github.com/langflow-ai/openrag.git
 cd openrag
-
-# Setup development environment
 make setup  # Creates .env and installs dependencies
 ```
 
-### Configuration
+2. Configure the `.env` file with your API keys and credentials.
 
-Edit `.env` with your API keys and credentials. See the main README for required environment variables.
+```bash
+# Required
+OPENAI_API_KEY=your_openai_api_key
+OPENSEARCH_PASSWORD=your_secure_password
+LANGFLOW_SUPERUSER=admin
+LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
+LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
+LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
+NUDGES_FLOW_ID=your_nudges_flow_id
+```
+
+For extended configuration, including ingestion and optional variables, see [docs/reference/configuration.mdx](docs/docs/reference/configuration.mdx).
+
+3. Start OpenRAG.
+
+```bash
+# Full stack with GPU support
+make dev
+
+# Or CPU only
+make dev-cpu
+```
+
+Access the services:
+- **Frontend**: http://localhost:3000
+- **Backend API**: http://localhost:8000
+- **Langflow**: http://localhost:7860
+- **OpenSearch**: http://localhost:9200
+- **OpenSearch Dashboards**: http://localhost:5601
 
 ## 🔧 Development Commands
 
diff --git a/Dockerfile.langflow b/Dockerfile.langflow
index 71baf447..bdae1f70 100644
--- a/Dockerfile.langflow
+++ b/Dockerfile.langflow
@@ -1,4 +1,4 @@
-FROM langflowai/langflow-nightly:1.6.3.dev0
+FROM langflowai/langflow-nightly:1.6.3.dev1
 
 EXPOSE 7860
 
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..d0f089fc
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include src/tui/_assets *
\ No newline at end of file
diff --git a/Makefile b/Makefile
index e8b08a1b..b30f77fc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,17 @@
 # OpenRAG Development Makefile
 # Provides easy commands for development workflow
 
-.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
+# Load variables from .env if present so `make` commands pick them up
+ifneq (,$(wildcard .env))
+  include .env
+  # Export all simple KEY=VALUE pairs to the environment for child processes
+  export $(shell sed -n 's/^\([A-Za-z_][A-Za-z0-9_]*\)=.*/\1/p' .env)
+endif
+
+.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install \
+       test test-integration test-ci \
+       backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os \
+       shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
 
 # Default target
 help:
@@ -32,14 +42,16 @@ help:
 	@echo "  shell-lf     - Shell into langflow container"
 	@echo ""
 	@echo "Testing:"
-	@echo "  test         - Run backend tests"
+	@echo "  test             - Run all backend tests"
+	@echo "  test-integration - Run integration tests (requires infra)"
+	@echo "  test-ci          - Start infra, run integration tests, tear down"
 	@echo "  lint         - Run linting checks"
 	@echo ""
 
 # Development environments
 dev:
 	@echo "🚀 Starting OpenRAG with GPU support..."
-	docker-compose up -d
+	docker compose up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@@ -49,7 +61,7 @@ dev:
 
 dev-cpu:
 	@echo "🚀 Starting OpenRAG with CPU only..."
-	docker-compose -f docker-compose-cpu.yml up -d
+	docker compose -f docker-compose-cpu.yml up -d
 	@echo "✅ Services started!"
 	@echo "   Backend: http://localhost:8000"
 	@echo "   Frontend: http://localhost:3000"
@@ -59,7 +71,7 @@ dev-cpu:
 
 dev-local:
 	@echo "🔧 Starting infrastructure only (for local development)..."
-	docker-compose up -d opensearch dashboards langflow
+	docker compose up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@@ -69,7 +81,7 @@ dev-local:
 
 infra:
 	@echo "🔧 Starting infrastructure services only..."
-	docker-compose up -d opensearch dashboards langflow
+	docker compose up -d opensearch dashboards langflow
 	@echo "✅ Infrastructure services started!"
 	@echo "   Langflow: http://localhost:7860"
 	@echo "   OpenSearch: http://localhost:9200"
@@ -86,15 +98,15 @@ infra-cpu:
 # Container management
 stop:
 	@echo "🛑 Stopping all containers..."
-	docker-compose down
-	docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
+	docker compose down
+	docker compose -f docker-compose-cpu.yml down 2>/dev/null || true
 
 restart: stop dev
 
 clean: stop
 	@echo "🧹 Cleaning up containers and volumes..."
-	docker-compose down -v --remove-orphans
-	docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
+	docker compose down -v --remove-orphans
+	docker compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
 	docker system prune -f
 
 # Local development
@@ -114,7 +126,7 @@ install: install-be install-fe
 
 install-be:
 	@echo "📦 Installing backend dependencies..."
-	uv sync
+	uv sync --extra torch-cu128
 
 install-fe:
 	@echo "📦 Installing frontend dependencies..."
@@ -123,7 +135,7 @@ install-fe:
 # Building
 build:
 	@echo "🔨 Building Docker images..."
-	docker-compose build
+	docker compose build
 
 build-be:
 	@echo "🔨 Building backend image..."
@@ -136,41 +148,124 @@ build-fe:
 # Logging and debugging
 logs:
 	@echo "📋 Showing all container logs..."
-	docker-compose logs -f
+	docker compose logs -f
 
 logs-be:
 	@echo "📋 Showing backend logs..."
-	docker-compose logs -f openrag-backend
+	docker compose logs -f openrag-backend
 
 logs-fe:
 	@echo "📋 Showing frontend logs..."
-	docker-compose logs -f openrag-frontend
+	docker compose logs -f openrag-frontend
 
 logs-lf:
 	@echo "📋 Showing langflow logs..."
-	docker-compose logs -f langflow
+	docker compose logs -f langflow
 
 logs-os:
 	@echo "📋 Showing opensearch logs..."
-	docker-compose logs -f opensearch
+	docker compose logs -f opensearch
 
 # Shell access
 shell-be:
 	@echo "🐚 Opening shell in backend container..."
-	docker-compose exec openrag-backend /bin/bash
+	docker compose exec openrag-backend /bin/bash
 
 shell-lf:
 	@echo "🐚 Opening shell in langflow container..."
-	docker-compose exec langflow /bin/bash
+	docker compose exec langflow /bin/bash
 
 shell-os:
 	@echo "🐚 Opening shell in opensearch container..."
-	docker-compose exec opensearch /bin/bash
+	docker compose exec opensearch /bin/bash
 
 # Testing and quality
 test:
-	@echo "🧪 Running backend tests..."
-	uv run pytest
+	@echo "🧪 Running all backend tests..."
+	uv run pytest tests/ -v
+
+test-integration:
+	@echo "🧪 Running integration tests (requires infrastructure)..."
+	@echo "💡 Make sure to run 'make infra' first!"
+	uv run pytest tests/integration/ -v
+
+# CI-friendly integration test target: brings up infra, waits, runs tests, tears down
+test-ci:
+	@set -e; \
+	echo "Installing test dependencies..."; \
+	uv sync --group dev; \
+	if [ ! -f keys/private_key.pem ]; then \
+		echo "Generating RSA keys for JWT signing..."; \
+		uv run python -c "from src.main import generate_jwt_keys; generate_jwt_keys()"; \
+	else \
+		echo "RSA keys already exist, ensuring correct permissions..."; \
+		chmod 600 keys/private_key.pem 2>/dev/null || true; \
+		chmod 644 keys/public_key.pem 2>/dev/null || true; \
+	fi; \
+	echo "Cleaning up old containers and volumes..."; \
+	docker compose -f docker-compose-cpu.yml down -v 2>/dev/null || true; \
+	echo "Pulling latest images..."; \
+	docker compose -f docker-compose-cpu.yml pull; \
+	echo "Starting infra (OpenSearch + Dashboards + Langflow) with CPU containers"; \
+	docker compose -f docker-compose-cpu.yml up -d opensearch dashboards langflow; \
+	echo "Starting docling-serve..."; \
+	DOCLING_ENDPOINT=$$(uv run python scripts/docling_ctl.py start --port 5001 | grep "Endpoint:" | awk '{print $$2}'); \
+	echo "Docling-serve started at $$DOCLING_ENDPOINT"; \
+	echo "Waiting for backend OIDC endpoint..."; \
+	for i in $$(seq 1 60); do \
+		docker exec openrag-backend curl -s http://localhost:8000/.well-known/openid-configuration >/dev/null 2>&1 && break || sleep 2; \
+	done; \
+	echo "Waiting for OpenSearch security config to be fully applied..."; \
+	for i in $$(seq 1 60); do \
+		if docker logs os 2>&1 | grep -q "Security configuration applied successfully"; then \
+			echo "✓ Security configuration applied"; \
+			break; \
+		fi; \
+		sleep 2; \
+	done; \
+	echo "Verifying OIDC authenticator is active in OpenSearch..."; \
+	AUTHC_CONFIG=$$(curl -k -s -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200/_opendistro/_security/api/securityconfig 2>/dev/null); \
+	if echo "$$AUTHC_CONFIG" | grep -q "openid_auth_domain"; then \
+		echo "✓ OIDC authenticator configured"; \
+		echo "$$AUTHC_CONFIG" | grep -A 5 "openid_auth_domain"; \
+	else \
+		echo "✗ OIDC authenticator NOT found in security config!"; \
+		echo "Security config:"; \
+		echo "$$AUTHC_CONFIG" | head -50; \
+		exit 1; \
+	fi; \
+	echo "Waiting for Langflow..."; \
+	for i in $$(seq 1 60); do \
+		curl -s http://localhost:7860/ >/dev/null 2>&1 && break || sleep 2; \
+	done; \
+	echo "Waiting for docling-serve at $$DOCLING_ENDPOINT..."; \
+	for i in $$(seq 1 60); do \
+		curl -s $${DOCLING_ENDPOINT}/health >/dev/null 2>&1 && break || sleep 2; \
+	done; \
+	echo "Running integration tests"; \
+	LOG_LEVEL=$${LOG_LEVEL:-DEBUG} \
+	GOOGLE_OAUTH_CLIENT_ID="" \
+	GOOGLE_OAUTH_CLIENT_SECRET="" \
+	OPENSEARCH_HOST=localhost OPENSEARCH_PORT=9200 \
+	OPENSEARCH_USERNAME=admin OPENSEARCH_PASSWORD=$${OPENSEARCH_PASSWORD} \
+	DISABLE_STARTUP_INGEST=$${DISABLE_STARTUP_INGEST:-true} \
+	uv run pytest tests/integration -vv -s -o log_cli=true --log-cli-level=DEBUG; \
+	TEST_RESULT=$$?; \
+	echo ""; \
+	echo "=== Post-test JWT diagnostics ==="; \
+	echo "Generating test JWT token..."; \
+	TEST_TOKEN=$$(uv run python -c "from src.session_manager import SessionManager, AnonymousUser; sm = SessionManager('test'); print(sm.create_jwt_token(AnonymousUser()))" 2>/dev/null || echo ""); \
+	if [ -n "$$TEST_TOKEN" ]; then \
+		echo "Testing JWT against OpenSearch..."; \
+		HTTP_CODE=$$(curl -k -s -w "%{http_code}" -o /tmp/os_diag.txt -H "Authorization: Bearer $$TEST_TOKEN" -H "Content-Type: application/json" https://localhost:9200/documents/_search -d '{"query":{"match_all":{}}}' 2>&1); \
+		echo "HTTP $$HTTP_CODE: $$(cat /tmp/os_diag.txt | head -c 150)"; \
+	fi; \
+	echo "================================="; \
+	echo ""; \
+	echo "Tearing down infra"; \
+	uv run python scripts/docling_ctl.py stop || true; \
+	docker compose down -v || true; \
+	exit $$TEST_RESULT
 
 lint:
 	@echo "🔍 Running linting checks..."
@@ -180,19 +275,19 @@ lint:
 # Service status
 status:
 	@echo "📊 Container status:"
-	@docker-compose ps 2>/dev/null || echo "No containers running"
+	@docker compose ps 2>/dev/null || echo "No containers running"
 
 health:
 	@echo "🏥 Health check:"
 	@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
 	@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
-	@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
+	@echo "OpenSearch: $$(curl -s -k -u admin:$${OPENSEARCH_PASSWORD} https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
 
 # Database operations
 db-reset:
 	@echo "🗄️ Resetting OpenSearch indices..."
-	curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
-	curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
+	curl -X DELETE "http://localhost:9200/documents" -u admin:$${OPENSEARCH_PASSWORD} || true
+	curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$${OPENSEARCH_PASSWORD} || true
 	@echo "Indices reset. Restart backend to recreate."
 
 # Flow management
@@ -215,4 +310,4 @@ setup:
 	@echo "⚙️ Setting up development environment..."
 	@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
 	@$(MAKE) install
-	@echo "✅ Setup complete! Run 'make dev' to start."
\ No newline at end of file
+	@echo "✅ Setup complete! Run 'make dev' to start."
diff --git a/README.md b/README.md
index a0178f28..a7abbbe6 100644
--- a/README.md
+++ b/README.md
@@ -2,20 +2,6 @@
 
 # OpenRAG
 
-</div>
-<div align="center">
-  <a href="#quick-start" style="color: #0366d6;">🚀 Quick Start</a> &nbsp;&nbsp;|&nbsp;&nbsp;
-  <a href="#tui-interface" style="color: #0366d6;">💻 TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
-  <a href="#docker-deployment" style="color: #0366d6;">🐳 Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
-  <a href="#development" style="color: #0366d6;">⚙️ Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
-  <a href="#troubleshooting" style="color: #0366d6;">🔧 Troubleshooting</a>
-</div>
-
-
-
-OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration. [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/phact/openrag)
-
-
 <div align="center">
   <a href="https://github.com/langflow-ai/langflow"><img src="https://img.shields.io/badge/Langflow-1C1C1E?style=flat&logo=langflow" alt="Langflow"></a>
   &nbsp;&nbsp;
@@ -24,144 +10,124 @@ OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables
   <a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
   &nbsp;&nbsp;
   <a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
-
+  &nbsp;&nbsp;
+  <a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
 </div>
 
+OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
 
+</div>
+<div align="center">
+  <a href="#quickstart" style="color: #0366d6;">Quickstart</a> &nbsp;&nbsp;|&nbsp;&nbsp;
+  <a href="#tui-interface" style="color: #0366d6;">TUI Interface</a> &nbsp;&nbsp;|&nbsp;&nbsp;
+  <a href="#docker-deployment" style="color: #0366d6;">Docker Deployment</a> &nbsp;&nbsp;|&nbsp;&nbsp;
+  <a href="#development" style="color: #0366d6;">Development</a> &nbsp;&nbsp;|&nbsp;&nbsp;
+  <a href="#troubleshooting" style="color: #0366d6;">Troubleshooting</a>
+</div>
 
+## Quickstart
 
+Use the OpenRAG Terminal User Interface (TUI) to manage your OpenRAG installation without complex command-line operations.
 
+To launch OpenRAG with the TUI, do the following:
 
-## 🚀 Quick Start
+1. Clone the OpenRAG repository.
+    ```bash
+    git clone https://github.com/langflow-ai/openrag.git
+    cd openrag
+    ```
 
-### Prerequisites
+2. To start the TUI, from the repository root, run:
+    ```bash
+    # Install dependencies first
+    uv sync
+    
+    # Launch the TUI
+    uv run openrag
+    ```
 
-- Docker or Podman with Compose installed
-- Make (for development commands)
+    The TUI opens and guides you through OpenRAG setup.
 
-### 1. Environment Setup
+For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
 
-```bash
-# Clone and setup environment
-git clone https://github.com/langflow-ai/openrag.git
-cd openrag
-make setup  # Creates .env and installs dependencies
-```
+## Docker Deployment
 
-### 2. Configure Environment
+If you prefer to use Docker to run OpenRAG, the repository includes two Docker Compose `.yml` files.
+They deploy the same applications and containers, but to different environments.
 
-Edit `.env` with your API keys and credentials:
+- [`docker-compose.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose.yml) is an OpenRAG deployment for environments with GPU support. GPU support requires an NVIDIA GPU with CUDA support and compatible NVIDIA drivers installed on the OpenRAG host machine. 
 
-```bash
-# Required
-OPENAI_API_KEY=your_openai_api_key
-OPENSEARCH_PASSWORD=your_secure_password
-LANGFLOW_SUPERUSER=admin
-LANGFLOW_SUPERUSER_PASSWORD=your_secure_password
-LANGFLOW_CHAT_FLOW_ID=your_chat_flow_id
-LANGFLOW_INGEST_FLOW_ID=your_ingest_flow_id
-NUDGES_FLOW_ID=your_nudges_flow_id
-```
-See extended configuration, including ingestion and optional variables: [docs/reference/configuration.md](docs/docs/reference/configuration.md)
-### 3. Start OpenRAG
+- [`docker-compose-cpu.yml`](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) is a CPU-only version of OpenRAG for systems without GPU support. Use this Docker compose file for environments where GPU drivers aren't available.
 
-```bash
-# Full stack with GPU support
-make dev
+Both Docker deployments depend on `docling serve` to be running on port `5001` on the host machine. This enables [Mac MLX](https://opensource.apple.com/projects/mlx/) support for document processing. Installing OpenRAG with the TUI starts `docling serve` automatically, but for a Docker deployment you must manually start the `docling serve` process.
 
-# Or CPU only
-make dev-cpu
-```
+To deploy OpenRAG with Docker:
 
-Access the services:
-- **Frontend**: http://localhost:3000
-- **Backend API**: http://localhost:8000
-- **Langflow**: http://localhost:7860
-- **OpenSearch**: http://localhost:9200
-- **OpenSearch Dashboards**: http://localhost:5601
+1. Clone the OpenRAG repository.
+    ```bash
+    git clone https://github.com/langflow-ai/openrag.git
+    cd openrag
+    ```
 
-## 🖥️ TUI Interface
+2. Install dependencies.
+    ```bash
+    uv sync
+    ```
 
-OpenRAG includes a powerful Terminal User Interface (TUI) for easy setup, configuration, and monitoring. The TUI provides a user-friendly way to manage your OpenRAG installation without complex command-line operations.
+3. Start `docling serve` on the host machine.
+    ```bash
+    uv run python scripts/docling_ctl.py start --port 5001
+    ```
+    
+4. Confirm `docling serve` is running.
+    ```
+    uv run python scripts/docling_ctl.py status
+    ```
 
-![OpenRAG TUI Interface](assets/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)
+    Successful result:
+    ```bash
+    Status: running
+    Endpoint: http://127.0.0.1:5001
+    Docs: http://127.0.0.1:5001/docs
+    PID: 27746
+    ```
 
-### Launching the TUI
+5. Build and start all services.
 
-```bash
-# Install dependencies first
-uv sync
+    For the GPU-accelerated deployment, run:
+    ```bash
+    docker compose build
+    docker compose up -d
+    ```
 
-# Launch the TUI
-uv run openrag
-```
+    For environments without GPU support, run: 
+    ```bash
+    docker compose -f docker-compose-cpu.yml up -d
+    ```
 
-### TUI Features
+   The OpenRAG Docker Compose file starts five containers:
+   | Container Name | Default Address | Purpose |
+   |---|---|---|
+   | OpenRAG Backend | http://localhost:8000 | FastAPI server and core functionality. |
+   | OpenRAG Frontend | http://localhost:3000 | React web interface for users. |
+   | Langflow | http://localhost:7860 | AI workflow engine and flow management. |
+   | OpenSearch | http://localhost:9200 | Vector database for document storage. |
+   | OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
 
-See the full TUI guide for features, navigation, and benefits: [docs/get-started/tui.mdx](docs/docs/get-started/tui.mdx)
+6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
 
+    To stop `docling serve`, run:
+    
+    ```bash
+    uv run python scripts/docling_ctl.py stop
+    ```
 
+For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
 
+## Troubleshooting
 
-## 🐳 Docker Deployment
+For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
 
-### Standard Deployment
+## Development
 
-```bash
-# Build and start all services
-docker compose build
-docker compose up -d
-```
-
-### CPU-Only Deployment
-
-For environments without GPU support:
-
-```bash
-docker compose -f docker-compose-cpu.yml up -d
-```
-
-More deployment commands and tips: [docs/get-started/docker.mdx](docs/docs/get-started/docker.mdx)
-
-## 🔧 Troubleshooting
-
-### Podman on macOS
-
-If using Podman on macOS, you may need to increase VM memory:
-
-```bash
-podman machine stop
-podman machine rm
-podman machine init --memory 8192   # 8 GB example
-podman machine start
-```
-
-### Common Issues
-
-See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)
-
-
-
-## 🛠️ Development
-
-For developers wanting to contribute to OpenRAG or set up a development environment, please see our comprehensive development guide:
-
-**[📚 See CONTRIBUTING.md for detailed development instructions](CONTRIBUTING.md)**
-
-The contributing guide includes:
-- Complete development environment setup
-- Local development workflows  
-- Testing and debugging procedures
-- Code style guidelines
-- Architecture overview
-- Pull request guidelines
-
-### Quick Development Commands
-
-```bash
-make help                    # See all available commands
-make setup                   # Initial development setup
-make infra                   # Start infrastructure services
-make backend                 # Run backend locally
-make frontend                # Run frontend locally
-```
\ No newline at end of file
+For developers wanting to contribute to OpenRAG or set up a development environment, see [CONTRIBUTING.md](CONTRIBUTING.md).
\ No newline at end of file
diff --git a/docker-compose-cpu.yml b/docker-compose-cpu.yml
index 0c09254a..c0af8a01 100644
--- a/docker-compose-cpu.yml
+++ b/docker-compose-cpu.yml
@@ -74,7 +74,7 @@ services:
     volumes:
       - ./documents:/app/documents:Z
       - ./keys:/app/keys:Z
-      - ./flows:/app/flows:Z
+      - ./flows:/app/flows:U,z
 
   openrag-frontend:
     image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
@@ -91,7 +91,7 @@ services:
 
   langflow:
     volumes:
-      - ./flows:/app/flows:Z
+      - ./flows:/app/flows:U,z
     image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
     # build:
     #   context: .
@@ -108,6 +108,7 @@ services:
       - OWNER_NAME=None
       - OWNER_EMAIL=None
       - CONNECTOR_TYPE=system
+      - CONNECTOR_TYPE_URL=url
       - OPENRAG-QUERY-FILTER="{}"
       - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
       - FILENAME=None
diff --git a/docker-compose.yml b/docker-compose.yml
index be9bcbc9..df8a3228 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -73,7 +73,7 @@ services:
     volumes:
       - ./documents:/app/documents:Z
       - ./keys:/app/keys:Z
-      - ./flows:/app/flows:z
+      - ./flows:/app/flows:U,z
     gpus: all
 
   openrag-frontend:
@@ -81,7 +81,6 @@ services:
     # build:
     #   context: .
     #   dockerfile: Dockerfile.frontend
-      #dockerfile: Dockerfile.frontend
     container_name: openrag-frontend
     depends_on:
       - openrag-backend
@@ -92,7 +91,7 @@ services:
 
   langflow:
     volumes:
-      - ./flows:/app/flows:z
+      - ./flows:/app/flows:U,z
     image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
     # build:
     #   context: .
@@ -109,6 +108,7 @@ services:
       - OWNER_NAME=None
       - OWNER_EMAIL=None
       - CONNECTOR_TYPE=system
+      - CONNECTOR_TYPE_URL=url
       - OPENRAG-QUERY-FILTER="{}"
       - FILENAME=None
       - MIMETYPE=None
diff --git a/docs/docs/_partial-external-preview.mdx b/docs/docs/_partial-external-preview.mdx
deleted file mode 100644
index 8563720c..00000000
--- a/docs/docs/_partial-external-preview.mdx
+++ /dev/null
@@ -1,4 +0,0 @@
-:::info
-OpenRAG is is currently in public preview.
-Development is ongoing, and the features and functionality are subject to change.
-:::
\ No newline at end of file
diff --git a/docs/docs/core-components/agents.mdx b/docs/docs/core-components/agents.mdx
index 3ee4617b..ea4c05bd 100644
--- a/docs/docs/core-components/agents.mdx
+++ b/docs/docs/core-components/agents.mdx
@@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 OpenRAG leverages Langflow's Agent component to power the OpenRAG OpenSearch Agent flow.
 
@@ -34,11 +31,11 @@ In an agentic context, tools are functions that the agent can run to perform tas
 
 </details>
 
-## Use the OpenRAG OpenSearch Agent flow
+## Use the OpenRAG OpenSearch Agent flow {#flow}
 
 If you've chatted with your knowledge in OpenRAG, you've already experienced the OpenRAG OpenSearch Agent chat flow.
 To switch OpenRAG over to the [Langflow visual editor](https://docs.langflow.org/concepts-overview) and view the OpenRAG OpenSearch Agentflow, click <Icon name="Settings2" aria-hidden="true"/> **Settings**, and then click **Edit in Langflow**.
-This flow contains seven components connected together to chat with your data:
+This flow contains eight components connected together to chat with your data:
 
 * The [**Agent** component](https://docs.langflow.org/agents) orchestrates the entire flow by deciding when to search the knowledge base, how to formulate search queries, and how to combine retrieved information with the user's question to generate a comprehensive response.
 The **Agent** behaves according to the prompt in the **Agent Instructions** field.
@@ -49,6 +46,7 @@ The **Agent** behaves according to the prompt in the **Agent Instructions** fiel
 * The [**Text Input** component](https://docs.langflow.org/components-io) is populated with the global variable `OPENRAG-QUERY-FILTER`. 
 This filter is the [Knowledge filter](/knowledge#create-knowledge-filters), and filters which knowledge sources to search through.
 * The **Agent** component's Output port is connected to the [**Chat Output** component](https://docs.langflow.org/components-io), which returns the final response to the user or application.
+* An [**MCP Tools** component](https://docs.langflow.org/mcp-client) is connected to the Agent's **Tools** port. This component calls the [OpenSearch URL Ingestion flow](/ingestion#url-flow), which Langflow uses as an MCP server to fetch content from URLs and store in OpenSearch.
 
 <PartialModifyFlows />
 
diff --git a/docs/docs/core-components/ingestion.mdx b/docs/docs/core-components/ingestion.mdx
index d3ce81b0..0a9df0bc 100644
--- a/docs/docs/core-components/ingestion.mdx
+++ b/docs/docs/core-components/ingestion.mdx
@@ -7,9 +7,6 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
 More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
@@ -50,4 +47,31 @@ If you want to use OpenRAG's built-in pipeline instead of Docling serve, set `DI
 
 The built-in pipeline still uses the Docling processor, but uses it directly without the Docling Serve API.
 
-For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
\ No newline at end of file
+For more information, see [`processors.py` in the OpenRAG repository](https://github.com/langflow-ai/openrag/blob/main/src/models/processors.py#L58).
+
+## Knowledge ingestion flows
+
+[Flows](https://docs.langflow.org/concepts-overview) in Langflow are functional representations of application workflows, with multiple [component](https://docs.langflow.org/concepts-components) nodes connected as single steps in a workflow.
+
+The **OpenSearch Ingestion** flow is the default knowledge ingestion flow in OpenRAG: when you **Add Knowledge** in OpenRAG, you run the OpenSearch Ingestion flow in the background. The flow ingests documents using **Docling Serve** to import and process documents.
+
+This flow contains ten components connected together to process and store documents in your knowledge base.
+
+* The [**Docling Serve** component](https://docs.langflow.org/bundles-docling) processes input documents by connecting to your instance of Docling Serve.
+* The [**Export DoclingDocument** component](https://docs.langflow.org/components-docling) exports the processed DoclingDocument to markdown format with image export mode set to placeholder. This conversion makes the structured document data into a standardized format for further processing.
+* Three [**DataFrame Operations** components](https://docs.langflow.org/components-processing#dataframe-operations) sequentially add metadata columns to the document data of `filename`, `file_size`, and `mimetype`.
+* The [**Split Text** component](https://docs.langflow.org/components-processing#split-text) splits the processed text into chunks with a chunk size of 1000 characters and an overlap of 200 characters.
+* Four **Secret Input** components provide secure access to configuration variables: `CONNECTOR_TYPE`, `OWNER`, `OWNER_EMAIL`, and `OWNER_NAME`. These are runtime variables populated from OAuth login.
+* The **Create Data** component combines the secret inputs into a structured data object that will be associated with the document embeddings.
+* The [**Embedding Model** component](https://docs.langflow.org/components-embedding-models) generates vector embeddings using OpenAI's `text-embedding-3-small` model. The embedding model is selected at [Application onboarding] and cannot be changed.
+* The [**OpenSearch** component](https://docs.langflow.org/bundles-elastic#opensearch) stores the processed documents and their embeddings in the `documents` index at `https://opensearch:9200`. By default, the component is authenticated with a JWT token, but you can also select `basic` auth mode, and enter your OpenSearch admin username and password.
+
+<PartialModifyFlows />
+
+### OpenSearch URL Ingestion flow {#url-flow}
+
+An additional knowledge ingestion flow is included in OpenRAG, where it is used as an MCP tool by the [**Open Search Agent flow**](/agents#flow).
+The agent calls this component to fetch web content, and the results are ingested into OpenSearch.
+
+For more on using MCP clients in Langflow, see [MCP clients](https://docs.langflow.org/mcp-client).\
+To connect additional MCP servers to the MCP client, see [Connect to MCP servers from your application](https://docs.langflow.org/mcp-tutorial).
\ No newline at end of file
diff --git a/docs/docs/core-components/knowledge.mdx b/docs/docs/core-components/knowledge.mdx
index d2a74ca4..9b699a4c 100644
--- a/docs/docs/core-components/knowledge.mdx
+++ b/docs/docs/core-components/knowledge.mdx
@@ -7,17 +7,23 @@ import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 OpenRAG uses [OpenSearch](https://docs.opensearch.org/latest/) for its vector-backed knowledge store.
 This is a specialized database for storing and retrieving embeddings, which helps your Agent efficiently find relevant information.
 OpenSearch provides powerful hybrid search capabilities with enterprise-grade security and multi-tenancy support.
 
+## Authentication and document access {#auth}
+
+OpenRAG supports two authentication modes based on how you [install OpenRAG](/install), and which mode you choose affects document access.
+
+**No-auth mode (Basic Setup)**: This mode uses a single anonymous JWT token for OpenSearch authentication, so documents uploaded to the `documents` index by one user are visible to all other users on the OpenRAG server.
+
+**OAuth mode (Advanced Setup)**: Each OpenRAG user is granted a JWT token, and each document is tagged with user ownership. Documents are filtered by user ownership, ensuring users only see documents they uploaded or have access to. 
+
 ## Ingest knowledge
 
 OpenRAG supports knowledge ingestion through direct file uploads and OAuth connectors.
+To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
 
 ### Direct file ingestion
 
@@ -78,18 +84,6 @@ You can select multiples.
 The ingestion process may take some time, depending on the size of your documents.
 4. When ingestion is complete, your documents are available in the Knowledge screen.
 
-### Sync cloud connectors
-
-Your connected data sources are found in the <Icon name="Settings2" aria-hidden="true"/> **Settings** page.
-
-When you click **Sync Now** for a connected cloud service like Google Drive, OpenRAG scans your connected Google Drive account to find files that match your sync criteria. Sync criteria are controlled in **Sync Settings** on the same page. You can sync all files, or select a maximum number of files to sync.
-
-For each file found, OpenRAG downloads, converts, and embeds the processed content into OpenSearch.
-
-You can monitor the sync progress in the <Icon name="Bell" aria-hidden="true"/> **Tasks** sidebar.
-
-Once processing is complete, the synced documents become available in your knowledge base and can be searched through the chat interface or Knowledge page.
-
 ## Explore knowledge
 
 The **Knowledge** page lists the documents OpenRAG has ingested into the OpenSearch vector database's `documents` index.
@@ -101,10 +95,6 @@ Documents are processed with the default **Knowledge Ingest** flow, so if you wa
 
 <PartialModifyFlows />
 
-### Knowledge ingestion settings
-
-To configure the knowledge ingestion pipeline parameters, see [Docling Ingestion](/ingestion).
-
 ## Create knowledge filters
 
 OpenRAG includes a knowledge filter system for organizing and managing document collections.
diff --git a/docs/docs/get-started/docker.mdx b/docs/docs/get-started/docker.mdx
index f7ec730b..eee2e866 100644
--- a/docs/docs/get-started/docker.mdx
+++ b/docs/docs/get-started/docker.mdx
@@ -4,9 +4,6 @@ slug: /get-started/docker
 ---
 
 import PartialOnboarding from '@site/docs/_partial-onboarding.mdx';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 There are two different Docker Compose files.
 They deploy the same applications and containers, but to different environments.
diff --git a/docs/docs/get-started/install.mdx b/docs/docs/get-started/install.mdx
index 1759e813..ff9872aa 100644
--- a/docs/docs/get-started/install.mdx
+++ b/docs/docs/get-started/install.mdx
@@ -6,9 +6,6 @@ slug: /install
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import PartialOnboarding from '@site/docs/_partial-onboarding.mdx'; 
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 [Install the OpenRAG Python wheel](#install-python-wheel), and then run the [OpenRAG Terminal User Interface(TUI)](#setup) to start your OpenRAG deployment with a guided setup process.
 
@@ -65,13 +62,15 @@ The OpenRAG wheel installs the Terminal User Interface (TUI) for configuring and
 ## Set up OpenRAG with the TUI {#setup}
 
 The TUI creates a `.env` file in your OpenRAG directory root and starts OpenRAG.
+If the TUI detects a `.env` file in the OpenRAG root directory, it sources any variables from the `.env` file.
+If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
 
-**Basic Setup** generates all of the required values except the OpenAI API key.
-**Basic Setup** does not set up OAuth connections for ingestion from Google Drive, OneDrive, or AWS.
+**Basic Setup** generates all of the required values for OpenRAG except the OpenAI API key.
+**Basic Setup** does not set up OAuth connections for ingestion from cloud providers.
 For OAuth setup, use **Advanced Setup**.
 
-If the TUI detects OAuth credentials, it enforces the **Advanced Setup** path.
-If the TUI detects a `.env` file in the OpenRAG root directory, it will source any variables from the `.env` file.
+**Basic Setup** and **Advanced Setup** enforce the same authentication settings for the Langflow server, but manage document access differently. For more information, see [Authentication and document access](/knowledge#auth).
+
 <Tabs groupId="Setup method">
   <TabItem value="Basic setup" label="Basic setup" default>
 
@@ -90,6 +89,7 @@ If the TUI detects a `.env` file in the OpenRAG root directory, it will source a
    7. Continue with [Application Onboarding](#application-onboarding).
   </TabItem>
   <TabItem value="Advanced setup" label="Advanced setup">
+
    1. To install OpenRAG with **Advanced Setup**, click **Advanced Setup** or press <kbd>2</kbd>. 
    2. Click **Generate Passwords** to generate passwords for OpenSearch and Langflow.
    3. Paste your OpenAI API key in the OpenAI API key field.
diff --git a/docs/docs/get-started/quickstart.mdx b/docs/docs/get-started/quickstart.mdx
index 838ad006..c2f4b3a5 100644
--- a/docs/docs/get-started/quickstart.mdx
+++ b/docs/docs/get-started/quickstart.mdx
@@ -6,9 +6,6 @@ slug: /quickstart
 import Icon from "@site/src/components/icon/icon";
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 Get started with OpenRAG by loading your knowledge, swapping out your language model, and then chatting with the OpenRAG API.
 
diff --git a/docs/docs/get-started/tui.mdx b/docs/docs/get-started/tui.mdx
index f3cfe51e..0a27a1e8 100644
--- a/docs/docs/get-started/tui.mdx
+++ b/docs/docs/get-started/tui.mdx
@@ -3,10 +3,6 @@ title: Terminal User Interface (TUI) commands
 slug: /get-started/tui
 ---
 
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
-
 The OpenRAG Terminal User Interface (TUI) allows you to set up, configure, and monitor your OpenRAG deployment directly from the terminal, on any operating system.
 
 ![OpenRAG TUI Interface](@site/static/img/OpenRAG_TUI_2025-09-10T13_04_11_757637.svg)
diff --git a/docs/docs/get-started/what-is-openrag.mdx b/docs/docs/get-started/what-is-openrag.mdx
index 18c01482..7d2340d0 100644
--- a/docs/docs/get-started/what-is-openrag.mdx
+++ b/docs/docs/get-started/what-is-openrag.mdx
@@ -3,10 +3,6 @@ title: What is OpenRAG?
 slug: /
 ---
 
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
-
 OpenRAG is an open-source package for building agentic RAG systems.
 It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
 
diff --git a/docs/docs/support/troubleshoot.mdx b/docs/docs/support/troubleshoot.mdx
index 9946db38..93599d04 100644
--- a/docs/docs/support/troubleshoot.mdx
+++ b/docs/docs/support/troubleshoot.mdx
@@ -5,9 +5,6 @@ slug: /support/troubleshoot
 
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
-import PartialExternalPreview from '@site/docs/_partial-external-preview.mdx';
-
-<PartialExternalPreview />
 
 This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
 
diff --git a/docs/static/img/opensearch-agent-flow.png b/docs/static/img/opensearch-agent-flow.png
index d201aef9..7382fcd1 100644
Binary files a/docs/static/img/opensearch-agent-flow.png and b/docs/static/img/opensearch-agent-flow.png differ
diff --git a/flows/openrag_agent.json b/flows/openrag_agent.json
index c08a305c..bb02b425 100644
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
@@ -170,6 +170,31 @@
         "sourceHandle": "{œdataTypeœ:œTextInputœ,œidœ:œTextInput-aHsQbœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}",
         "target": "OpenSearch-iYfjf",
         "targetHandle": "{œfieldNameœ:œfilter_expressionœ,œidœ:œOpenSearch-iYfjfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}"
+      },
+      {
+        "data": {
+          "sourceHandle": {
+            "dataType": "MCP",
+            "id": "MCP-7EY21",
+            "name": "component_as_tool",
+            "output_types": [
+              "Tool"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "tools",
+            "id": "Agent-crjWf",
+            "inputTypes": [
+              "Tool"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__MCP-7EY21{œdataTypeœ:œMCPœ,œidœ:œMCP-7EY21œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-crjWf{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}",
+        "source": "MCP-7EY21",
+        "sourceHandle": "{œdataTypeœ:œMCPœ,œidœ:œMCP-7EY21œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}",
+        "target": "Agent-crjWf",
+        "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}"
       }
     ],
     "nodes": [
@@ -730,7 +755,7 @@
             ],
             "frozen": false,
             "icon": "OpenSearch",
-            "last_updated": "2025-10-04T05:41:33.344Z",
+            "last_updated": "2025-10-06T15:23:50.339Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -1384,7 +1409,7 @@
             ],
             "frozen": false,
             "icon": "binary",
-            "last_updated": "2025-10-04T05:41:33.345Z",
+            "last_updated": "2025-10-06T15:23:50.341Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -1709,7 +1734,7 @@
             ],
             "frozen": false,
             "icon": "bot",
-            "last_updated": "2025-10-04T05:41:33.399Z",
+            "last_updated": "2025-10-06T15:23:50.396Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -2248,7 +2273,7 @@
             ],
             "frozen": false,
             "icon": "brain-circuit",
-            "last_updated": "2025-10-04T05:41:33.347Z",
+            "last_updated": "2025-10-06T15:23:50.343Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -2551,10 +2576,258 @@
         },
         "selected": false,
         "type": "genericNode"
+      },
+      {
+        "data": {
+          "id": "MCP-7EY21",
+          "node": {
+            "base_classes": [
+              "DataFrame"
+            ],
+            "beta": false,
+            "category": "MCP",
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Connect to an MCP server to use its tools.",
+            "display_name": "MCP Tools",
+            "documentation": "https://docs.langflow.org/mcp-client",
+            "edited": false,
+            "field_order": [
+              "mcp_server",
+              "use_cache",
+              "tool",
+              "tool_placeholder"
+            ],
+            "frozen": false,
+            "icon": "Mcp",
+            "key": "mcp_lf-starter_project",
+            "last_updated": "2025-10-06T15:23:56.578Z",
+            "legacy": false,
+            "mcpServerName": "lf-starter_project",
+            "metadata": {
+              "code_hash": "756d1e10d0ca",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langchain_core",
+                    "version": "0.3.77"
+                  },
+                  {
+                    "name": "lfx",
+                    "version": null
+                  },
+                  {
+                    "name": "langflow",
+                    "version": null
+                  }
+                ],
+                "total_dependencies": 3
+              },
+              "module": "lfx.components.agents.mcp_component.MCPToolsComponent"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Toolset",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "to_toolkit",
+                "name": "component_as_tool",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Tool",
+                "tool_mode": true,
+                "types": [
+                  "Tool"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from __future__ import annotations\n\nimport asyncio\nimport uuid\nfrom typing import Any\n\nfrom langchain_core.tools import StructuredTool  # noqa: TC002\n\nfrom lfx.base.agents.utils import maybe_unflatten_dict, safe_cache_get, safe_cache_set\nfrom lfx.base.mcp.util import MCPSseClient, MCPStdioClient, create_input_schema_from_json_schema, update_tools\nfrom lfx.custom.custom_component.component_with_cache import ComponentWithCache\nfrom lfx.inputs.inputs import InputTypes  # noqa: TC001\nfrom lfx.io import BoolInput, DropdownInput, McpInput, MessageTextInput, Output\nfrom lfx.io.schema import flatten_schema, schema_to_langflow_inputs\nfrom lfx.log.logger import logger\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service, session_scope\n\n\nclass MCPToolsComponent(ComponentWithCache):\n    schema_inputs: list = []\n    tools: list[StructuredTool] = []\n    _not_load_actions: bool = False\n    _tool_cache: dict = {}\n    _last_selected_server: str | None = None  # Cache for the last selected server\n\n    def __init__(self, **data) -> None:\n        super().__init__(**data)\n        # Initialize cache keys to avoid CacheMiss when accessing them\n        self._ensure_cache_structure()\n\n        # Initialize clients with access to the component cache\n        self.stdio_client: MCPStdioClient = MCPStdioClient(component_cache=self._shared_component_cache)\n        self.sse_client: MCPSseClient = MCPSseClient(component_cache=self._shared_component_cache)\n\n    def _ensure_cache_structure(self):\n        \"\"\"Ensure the cache has the required structure.\"\"\"\n        # Check if servers key exists and is not CacheMiss\n        servers_value = safe_cache_get(self._shared_component_cache, \"servers\")\n        if servers_value is None:\n            safe_cache_set(self._shared_component_cache, \"servers\", {})\n\n        # Check if last_selected_server key exists and is not CacheMiss\n        last_server_value = safe_cache_get(self._shared_component_cache, \"last_selected_server\")\n        if last_server_value is None:\n            safe_cache_set(self._shared_component_cache, \"last_selected_server\", \"\")\n\n    default_keys: list[str] = [\n        \"code\",\n        \"_type\",\n        \"tool_mode\",\n        \"tool_placeholder\",\n        \"mcp_server\",\n        \"tool\",\n        \"use_cache\",\n    ]\n\n    display_name = \"MCP Tools\"\n    description = \"Connect to an MCP server to use its tools.\"\n    documentation: str = \"https://docs.langflow.org/mcp-client\"\n    icon = \"Mcp\"\n    name = \"MCPTools\"\n\n    inputs = [\n        McpInput(\n            name=\"mcp_server\",\n            display_name=\"MCP Server\",\n            info=\"Select the MCP Server that will be used by this component\",\n            real_time_refresh=True,\n        ),\n        BoolInput(\n            name=\"use_cache\",\n            display_name=\"Use Cached Server\",\n            info=(\n                \"Enable caching of MCP Server and tools to improve performance. \"\n                \"Disable to always fetch fresh tools and server updates.\"\n            ),\n            value=False,\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"tool\",\n            display_name=\"Tool\",\n            options=[],\n            value=\"\",\n            info=\"Select the tool to execute\",\n            show=False,\n            required=True,\n            real_time_refresh=True,\n        ),\n        MessageTextInput(\n            name=\"tool_placeholder\",\n            display_name=\"Tool Placeholder\",\n            info=\"Placeholder for the tool\",\n            value=\"\",\n            show=False,\n            tool_mode=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Response\", name=\"response\", method=\"build_output\"),\n    ]\n\n    async def _validate_schema_inputs(self, tool_obj) -> list[InputTypes]:\n        \"\"\"Validate and process schema inputs for a tool.\"\"\"\n        try:\n            if not tool_obj or not hasattr(tool_obj, \"args_schema\"):\n                msg = \"Invalid tool object or missing input schema\"\n                raise ValueError(msg)\n\n            flat_schema = flatten_schema(tool_obj.args_schema.schema())\n            input_schema = create_input_schema_from_json_schema(flat_schema)\n            if not input_schema:\n                msg = f\"Empty input schema for tool '{tool_obj.name}'\"\n                raise ValueError(msg)\n\n            schema_inputs = schema_to_langflow_inputs(input_schema)\n            if not schema_inputs:\n                msg = f\"No input parameters defined for tool '{tool_obj.name}'\"\n                await logger.awarning(msg)\n                return []\n\n        except Exception as e:\n            msg = f\"Error validating schema inputs: {e!s}\"\n            await logger.aexception(msg)\n            raise ValueError(msg) from e\n        else:\n            return schema_inputs\n\n    async def update_tool_list(self, mcp_server_value=None):\n        # Accepts mcp_server_value as dict {name, config} or uses self.mcp_server\n        mcp_server = mcp_server_value if mcp_server_value is not None else getattr(self, \"mcp_server\", None)\n        server_name = None\n        server_config_from_value = None\n        if isinstance(mcp_server, dict):\n            server_name = mcp_server.get(\"name\")\n            server_config_from_value = mcp_server.get(\"config\")\n        else:\n            server_name = mcp_server\n        if not server_name:\n            self.tools = []\n            return [], {\"name\": server_name, \"config\": server_config_from_value}\n\n        # Check if caching is enabled, default to False\n        use_cache = getattr(self, \"use_cache\", False)\n\n        # Use shared cache if available and caching is enabled\n        cached = None\n        if use_cache:\n            servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n            cached = servers_cache.get(server_name) if isinstance(servers_cache, dict) else None\n\n        if cached is not None:\n            try:\n                self.tools = cached[\"tools\"]\n                self.tool_names = cached[\"tool_names\"]\n                self._tool_cache = cached[\"tool_cache\"]\n                server_config_from_value = cached[\"config\"]\n            except (TypeError, KeyError, AttributeError) as e:\n                # Handle corrupted cache data by clearing it and continuing to fetch fresh tools\n                msg = f\"Unable to use cached data for MCP Server{server_name}: {e}\"\n                await logger.awarning(msg)\n                # Clear the corrupted cache entry\n                current_servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n                if isinstance(current_servers_cache, dict) and server_name in current_servers_cache:\n                    current_servers_cache.pop(server_name)\n                    safe_cache_set(self._shared_component_cache, \"servers\", current_servers_cache)\n            else:\n                return self.tools, {\"name\": server_name, \"config\": server_config_from_value}\n\n        try:\n            try:\n                from langflow.api.v2.mcp import get_server\n                from langflow.services.database.models.user.crud import get_user_by_id\n            except ImportError as e:\n                msg = (\n                    \"Langflow MCP server functionality is not available. \"\n                    \"This feature requires the full Langflow installation.\"\n                )\n                raise ImportError(msg) from e\n            async with session_scope() as db:\n                if not self.user_id:\n                    msg = \"User ID is required for fetching MCP tools.\"\n                    raise ValueError(msg)\n                current_user = await get_user_by_id(db, self.user_id)\n\n                # Try to get server config from DB/API\n                server_config = await get_server(\n                    server_name,\n                    current_user,\n                    db,\n                    storage_service=get_storage_service(),\n                    settings_service=get_settings_service(),\n                )\n\n            # If get_server returns empty but we have a config, use it\n            if not server_config and server_config_from_value:\n                server_config = server_config_from_value\n\n            if not server_config:\n                self.tools = []\n                return [], {\"name\": server_name, \"config\": server_config}\n\n            _, tool_list, tool_cache = await update_tools(\n                server_name=server_name,\n                server_config=server_config,\n                mcp_stdio_client=self.stdio_client,\n                mcp_sse_client=self.sse_client,\n            )\n\n            self.tool_names = [tool.name for tool in tool_list if hasattr(tool, \"name\")]\n            self._tool_cache = tool_cache\n            self.tools = tool_list\n\n            # Cache the result only if caching is enabled\n            if use_cache:\n                cache_data = {\n                    \"tools\": tool_list,\n                    \"tool_names\": self.tool_names,\n                    \"tool_cache\": tool_cache,\n                    \"config\": server_config,\n                }\n\n                # Safely update the servers cache\n                current_servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n                if isinstance(current_servers_cache, dict):\n                    current_servers_cache[server_name] = cache_data\n                    safe_cache_set(self._shared_component_cache, \"servers\", current_servers_cache)\n\n        except (TimeoutError, asyncio.TimeoutError) as e:\n            msg = f\"Timeout updating tool list: {e!s}\"\n            await logger.aexception(msg)\n            raise TimeoutError(msg) from e\n        except Exception as e:\n            msg = f\"Error updating tool list: {e!s}\"\n            await logger.aexception(msg)\n            raise ValueError(msg) from e\n        else:\n            return tool_list, {\"name\": server_name, \"config\": server_config}\n\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Toggle the visibility of connection-specific fields based on the selected mode.\"\"\"\n        try:\n            if field_name == \"tool\":\n                try:\n                    if len(self.tools) == 0:\n                        try:\n                            self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n                            build_config[\"tool\"][\"options\"] = [tool.name for tool in self.tools]\n                            build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n                        except (TimeoutError, asyncio.TimeoutError) as e:\n                            msg = f\"Timeout updating tool list: {e!s}\"\n                            await logger.aexception(msg)\n                            if not build_config[\"tools_metadata\"][\"show\"]:\n                                build_config[\"tool\"][\"show\"] = True\n                                build_config[\"tool\"][\"options\"] = []\n                                build_config[\"tool\"][\"value\"] = \"\"\n                                build_config[\"tool\"][\"placeholder\"] = \"Timeout on MCP server\"\n                            else:\n                                build_config[\"tool\"][\"show\"] = False\n                        except ValueError:\n                            if not build_config[\"tools_metadata\"][\"show\"]:\n                                build_config[\"tool\"][\"show\"] = True\n                                build_config[\"tool\"][\"options\"] = []\n                                build_config[\"tool\"][\"value\"] = \"\"\n                                build_config[\"tool\"][\"placeholder\"] = \"Error on MCP Server\"\n                            else:\n                                build_config[\"tool\"][\"show\"] = False\n\n                    if field_value == \"\":\n                        return build_config\n                    tool_obj = None\n                    for tool in self.tools:\n                        if tool.name == field_value:\n                            tool_obj = tool\n                            break\n                    if tool_obj is None:\n                        msg = f\"Tool {field_value} not found in available tools: {self.tools}\"\n                        await logger.awarning(msg)\n                        return build_config\n                    await self._update_tool_config(build_config, field_value)\n                except Exception as e:\n                    build_config[\"tool\"][\"options\"] = []\n                    msg = f\"Failed to update tools: {e!s}\"\n                    raise ValueError(msg) from e\n                else:\n                    return build_config\n            elif field_name == \"mcp_server\":\n                if not field_value:\n                    build_config[\"tool\"][\"show\"] = False\n                    build_config[\"tool\"][\"options\"] = []\n                    build_config[\"tool\"][\"value\"] = \"\"\n                    build_config[\"tool\"][\"placeholder\"] = \"\"\n                    build_config[\"tool_placeholder\"][\"tool_mode\"] = False\n                    self.remove_non_default_keys(build_config)\n                    return build_config\n\n                build_config[\"tool_placeholder\"][\"tool_mode\"] = True\n\n                current_server_name = field_value.get(\"name\") if isinstance(field_value, dict) else field_value\n                _last_selected_server = safe_cache_get(self._shared_component_cache, \"last_selected_server\", \"\")\n\n                # To avoid unnecessary updates, only proceed if the server has actually changed\n                if (_last_selected_server in (current_server_name, \"\")) and build_config[\"tool\"][\"show\"]:\n                    if current_server_name:\n                        servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n                        if isinstance(servers_cache, dict):\n                            cached = servers_cache.get(current_server_name)\n                            if cached is not None and cached.get(\"tool_names\"):\n                                cached_tools = cached[\"tool_names\"]\n                                current_tools = build_config[\"tool\"][\"options\"]\n                                if current_tools == cached_tools:\n                                    return build_config\n                    else:\n                        return build_config\n\n                # Determine if \"Tool Mode\" is active by checking if the tool dropdown is hidden.\n                is_in_tool_mode = build_config[\"tools_metadata\"][\"show\"]\n                safe_cache_set(self._shared_component_cache, \"last_selected_server\", current_server_name)\n\n                # Check if tools are already cached for this server before clearing\n                cached_tools = None\n                if current_server_name:\n                    use_cache = getattr(self, \"use_cache\", True)\n                    if use_cache:\n                        servers_cache = safe_cache_get(self._shared_component_cache, \"servers\", {})\n                        if isinstance(servers_cache, dict):\n                            cached = servers_cache.get(current_server_name)\n                            if cached is not None:\n                                try:\n                                    cached_tools = cached[\"tools\"]\n                                    self.tools = cached_tools\n                                    self.tool_names = cached[\"tool_names\"]\n                                    self._tool_cache = cached[\"tool_cache\"]\n                                except (TypeError, KeyError, AttributeError) as e:\n                                    # Handle corrupted cache data by ignoring it\n                                    msg = f\"Unable to use cached data for MCP Server,{current_server_name}: {e}\"\n                                    await logger.awarning(msg)\n                                    cached_tools = None\n\n                # Only clear tools if we don't have cached tools for the current server\n                if not cached_tools:\n                    self.tools = []  # Clear previous tools only if no cache\n\n                self.remove_non_default_keys(build_config)  # Clear previous tool inputs\n\n                # Only show the tool dropdown if not in tool_mode\n                if not is_in_tool_mode:\n                    build_config[\"tool\"][\"show\"] = True\n                    if cached_tools:\n                        # Use cached tools to populate options immediately\n                        build_config[\"tool\"][\"options\"] = [tool.name for tool in cached_tools]\n                        build_config[\"tool\"][\"placeholder\"] = \"Select a tool\"\n                    else:\n                        # Show loading state only when we need to fetch tools\n                        build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n                        build_config[\"tool\"][\"options\"] = []\n                    build_config[\"tool\"][\"value\"] = uuid.uuid4()\n                else:\n                    # Keep the tool dropdown hidden if in tool_mode\n                    self._not_load_actions = True\n                    build_config[\"tool\"][\"show\"] = False\n\n            elif field_name == \"tool_mode\":\n                build_config[\"tool\"][\"placeholder\"] = \"\"\n                build_config[\"tool\"][\"show\"] = not bool(field_value) and bool(build_config[\"mcp_server\"])\n                self.remove_non_default_keys(build_config)\n                self.tool = build_config[\"tool\"][\"value\"]\n                if field_value:\n                    self._not_load_actions = True\n                else:\n                    build_config[\"tool\"][\"value\"] = uuid.uuid4()\n                    build_config[\"tool\"][\"options\"] = []\n                    build_config[\"tool\"][\"show\"] = True\n                    build_config[\"tool\"][\"placeholder\"] = \"Loading tools...\"\n            elif field_name == \"tools_metadata\":\n                self._not_load_actions = False\n\n        except Exception as e:\n            msg = f\"Error in update_build_config: {e!s}\"\n            await logger.aexception(msg)\n            raise ValueError(msg) from e\n        else:\n            return build_config\n\n    def get_inputs_for_all_tools(self, tools: list) -> dict:\n        \"\"\"Get input schemas for all tools.\"\"\"\n        inputs = {}\n        for tool in tools:\n            if not tool or not hasattr(tool, \"name\"):\n                continue\n            try:\n                flat_schema = flatten_schema(tool.args_schema.schema())\n                input_schema = create_input_schema_from_json_schema(flat_schema)\n                langflow_inputs = schema_to_langflow_inputs(input_schema)\n                inputs[tool.name] = langflow_inputs\n            except (AttributeError, ValueError, TypeError, KeyError) as e:\n                msg = f\"Error getting inputs for tool {getattr(tool, 'name', 'unknown')}: {e!s}\"\n                logger.exception(msg)\n                continue\n        return inputs\n\n    def remove_input_schema_from_build_config(\n        self, build_config: dict, tool_name: str, input_schema: dict[list[InputTypes], Any]\n    ):\n        \"\"\"Remove the input schema for the tool from the build config.\"\"\"\n        # Keep only schemas that don't belong to the current tool\n        input_schema = {k: v for k, v in input_schema.items() if k != tool_name}\n        # Remove all inputs from other tools\n        for value in input_schema.values():\n            for _input in value:\n                if _input.name in build_config:\n                    build_config.pop(_input.name)\n\n    def remove_non_default_keys(self, build_config: dict) -> None:\n        \"\"\"Remove non-default keys from the build config.\"\"\"\n        for key in list(build_config.keys()):\n            if key not in self.default_keys:\n                build_config.pop(key)\n\n    async def _update_tool_config(self, build_config: dict, tool_name: str) -> None:\n        \"\"\"Update tool configuration with proper error handling.\"\"\"\n        if not self.tools:\n            self.tools, build_config[\"mcp_server\"][\"value\"] = await self.update_tool_list()\n\n        if not tool_name:\n            return\n\n        tool_obj = next((tool for tool in self.tools if tool.name == tool_name), None)\n        if not tool_obj:\n            msg = f\"Tool {tool_name} not found in available tools: {self.tools}\"\n            self.remove_non_default_keys(build_config)\n            build_config[\"tool\"][\"value\"] = \"\"\n            await logger.awarning(msg)\n            return\n\n        try:\n            # Store current values before removing inputs\n            current_values = {}\n            for key, value in build_config.items():\n                if key not in self.default_keys and isinstance(value, dict) and \"value\" in value:\n                    current_values[key] = value[\"value\"]\n\n            # Get all tool inputs and remove old ones\n            input_schema_for_all_tools = self.get_inputs_for_all_tools(self.tools)\n            self.remove_input_schema_from_build_config(build_config, tool_name, input_schema_for_all_tools)\n\n            # Get and validate new inputs\n            self.schema_inputs = await self._validate_schema_inputs(tool_obj)\n            if not self.schema_inputs:\n                msg = f\"No input parameters to configure for tool '{tool_name}'\"\n                await logger.ainfo(msg)\n                return\n\n            # Add new inputs to build config\n            for schema_input in self.schema_inputs:\n                if not schema_input or not hasattr(schema_input, \"name\"):\n                    msg = \"Invalid schema input detected, skipping\"\n                    await logger.awarning(msg)\n                    continue\n\n                try:\n                    name = schema_input.name\n                    input_dict = schema_input.to_dict()\n                    input_dict.setdefault(\"value\", None)\n                    input_dict.setdefault(\"required\", True)\n\n                    build_config[name] = input_dict\n\n                    # Preserve existing value if the parameter name exists in current_values\n                    if name in current_values:\n                        build_config[name][\"value\"] = current_values[name]\n\n                except (AttributeError, KeyError, TypeError) as e:\n                    msg = f\"Error processing schema input {schema_input}: {e!s}\"\n                    await logger.aexception(msg)\n                    continue\n        except ValueError as e:\n            msg = f\"Schema validation error for tool {tool_name}: {e!s}\"\n            await logger.aexception(msg)\n            self.schema_inputs = []\n            return\n        except (AttributeError, KeyError, TypeError) as e:\n            msg = f\"Error updating tool config: {e!s}\"\n            await logger.aexception(msg)\n            raise ValueError(msg) from e\n\n    async def build_output(self) -> DataFrame:\n        \"\"\"Build output with improved error handling and validation.\"\"\"\n        try:\n            self.tools, _ = await self.update_tool_list()\n            if self.tool != \"\":\n                # Set session context for persistent MCP sessions using Langflow session ID\n                session_context = self._get_session_context()\n                if session_context:\n                    self.stdio_client.set_session_context(session_context)\n                    self.sse_client.set_session_context(session_context)\n\n                exec_tool = self._tool_cache[self.tool]\n                tool_args = self.get_inputs_for_all_tools(self.tools)[self.tool]\n                kwargs = {}\n                for arg in tool_args:\n                    value = getattr(self, arg.name, None)\n                    if value is not None:\n                        if isinstance(value, Message):\n                            kwargs[arg.name] = value.text\n                        else:\n                            kwargs[arg.name] = value\n\n                unflattened_kwargs = maybe_unflatten_dict(kwargs)\n\n                output = await exec_tool.coroutine(**unflattened_kwargs)\n\n                tool_content = []\n                for item in output.content:\n                    item_dict = item.model_dump()\n                    tool_content.append(item_dict)\n                return DataFrame(data=tool_content)\n            return DataFrame(data=[{\"error\": \"You must select a tool\"}])\n        except Exception as e:\n            msg = f\"Error in build_output: {e!s}\"\n            await logger.aexception(msg)\n            raise ValueError(msg) from e\n\n    def _get_session_context(self) -> str | None:\n        \"\"\"Get the Langflow session ID for MCP session caching.\"\"\"\n        # Try to get session ID from the component's execution context\n        if hasattr(self, \"graph\") and hasattr(self.graph, \"session_id\"):\n            session_id = self.graph.session_id\n            # Include server name to ensure different servers get different sessions\n            server_name = \"\"\n            mcp_server = getattr(self, \"mcp_server\", None)\n            if isinstance(mcp_server, dict):\n                server_name = mcp_server.get(\"name\", \"\")\n            elif mcp_server:\n                server_name = str(mcp_server)\n            return f\"{session_id}_{server_name}\" if session_id else None\n        return None\n\n    async def _get_tools(self):\n        \"\"\"Get cached tools or update if necessary.\"\"\"\n        mcp_server = getattr(self, \"mcp_server\", None)\n        if not self._not_load_actions:\n            tools, _ = await self.update_tool_list(mcp_server)\n            return tools\n        return []\n"
+              },
+              "mcp_server": {
+                "_input_type": "McpInput",
+                "advanced": false,
+                "display_name": "MCP Server",
+                "dynamic": false,
+                "info": "Select the MCP Server that will be used by this component",
+                "name": "mcp_server",
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "mcp",
+                "value": {
+                  "config": {
+                    "args": [
+                      "mcp-proxy",
+                      "--headers",
+                      "x-api-key",
+                      "sk-lq7nQIiX4jbYTIOGH7YG9z46E0IW1i-FSvn_hkcg2xE",
+                      "http://localhost:7860/api/v1/mcp/project/304fb921-38e4-4763-b223-832a3e3546e0/sse"
+                    ],
+                    "command": "uvx"
+                  },
+                  "name": "lf-starter_project"
+                }
+              },
+              "tool": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Tool",
+                "dynamic": false,
+                "external_options": {},
+                "info": "Select the tool to execute",
+                "name": "tool",
+                "options": [
+                  "opensearch_url_ingestion_flow"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": true,
+                "show": false,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "tool_placeholder": {
+                "_input_type": "MessageTextInput",
+                "advanced": false,
+                "display_name": "Tool Placeholder",
+                "dynamic": false,
+                "info": "Placeholder for the tool",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "tool_placeholder",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": true,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "tools_metadata": {
+                "_input_type": "ToolsInput",
+                "advanced": false,
+                "display_name": "Actions",
+                "dynamic": false,
+                "info": "Modify tool names and descriptions to help agents understand when to use each tool.",
+                "is_list": true,
+                "list_add_label": "Add More",
+                "name": "tools_metadata",
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "tools",
+                "value": [
+                  {
+                    "args": {
+                      "input_value": {
+                        "anyOf": [
+                          {
+                            "type": "string"
+                          },
+                          {
+                            "type": "null"
+                          }
+                        ],
+                        "default": null,
+                        "description": "Message to be passed as input.",
+                        "title": "Input Value"
+                      }
+                    },
+                    "description": "This flow is to ingest the URL to open search.",
+                    "display_description": "This flow is to ingest the URL to open search.",
+                    "display_name": "opensearch_url_ingestion_flow",
+                    "name": "opensearch_url_ingestion_flow",
+                    "readonly": false,
+                    "status": true,
+                    "tags": [
+                      "opensearch_url_ingestion_flow"
+                    ]
+                  }
+                ]
+              },
+              "use_cache": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Use Cached Server",
+                "dynamic": false,
+                "info": "Enable caching of MCP Server and tools to improve performance. Disable to always fetch fresh tools and server updates.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "use_cache",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              }
+            },
+            "tool_mode": true
+          },
+          "showNode": true,
+          "type": "MCP"
+        },
+        "id": "MCP-7EY21",
+        "measured": {
+          "height": 284,
+          "width": 320
+        },
+        "position": {
+          "x": 675.7137923419156,
+          "y": 878.6218422334763
+        },
+        "selected": false,
+        "type": "genericNode"
       }
     ],
     "viewport": {
-      "x": -149.48015964664273,
+      "x": -237.0727605845459,
       "y": 154.6885920024542,
       "zoom": 0.602433700773958
     }
@@ -2563,8 +2836,8 @@
   "endpoint_name": null,
   "id": "1098eea1-6649-4e1d-aed1-b77249fb8dd0",
   "is_component": false,
-  "last_tested_version": "1.6.3.dev0",
-  "name": "OpenRAG OpenSearch Agent",
+  "last_tested_version": "1.6.0",
+  "name": "OpenRAG Open Search Agent",
   "tags": [
     "assistants",
     "agents"
diff --git a/flows/openrag_url_mcp.json b/flows/openrag_url_mcp.json
index 69dbc85d..9cab0fed 100644
--- a/flows/openrag_url_mcp.json
+++ b/flows/openrag_url_mcp.json
@@ -232,6 +232,7 @@
       },
       {
         "animated": false,
+        "className": "",
         "data": {
           "sourceHandle": {
             "dataType": "EmbeddingModel",
@@ -733,6 +734,10 @@
                   {
                     "key": "owner_email",
                     "value": "OWNER_EMAIL"
+                  },
+                  {
+                    "key": "connector_type",
+                    "value": "CONNECTOR_TYPE_URL"
                   }
                 ]
               },
@@ -1808,7 +1813,7 @@
             ],
             "frozen": false,
             "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.023Z",
+            "last_updated": "2025-10-06T17:46:55.068Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -2224,7 +2229,7 @@
             ],
             "frozen": false,
             "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.025Z",
+            "last_updated": "2025-10-06T17:46:55.069Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -2897,7 +2902,7 @@
             ],
             "frozen": false,
             "icon": "table",
-            "last_updated": "2025-10-03T20:31:36.026Z",
+            "last_updated": "2025-10-06T17:46:55.069Z",
             "legacy": false,
             "metadata": {
               "code_hash": "b4d6b19b6eef",
@@ -3310,7 +3315,7 @@
             ],
             "frozen": false,
             "icon": "binary",
-            "last_updated": "2025-10-03T20:31:47.177Z",
+            "last_updated": "2025-10-06T17:46:54.996Z",
             "legacy": false,
             "metadata": {
               "code_hash": "8607e963fdef",
@@ -3595,17 +3600,17 @@
       }
     ],
     "viewport": {
-      "x": -407.1633937626607,
-      "y": -577.5291936220412,
-      "zoom": 0.5347553210574026
+      "x": -538.2311610019549,
+      "y": -337.3313239657308,
+      "zoom": 0.45546556043892106
     }
   },
   "description": "This flow is to ingest the URL to open search.",
   "endpoint_name": null,
-  "mcp_enabled": true,
   "id": "72c3d17c-2dac-4a73-b48a-6518473d7830",
+  "mcp_enabled": true,
   "is_component": false,
-  "last_tested_version": "1.6.0",
+  "last_tested_version": "1.6.3.dev1",
   "name": "OpenSearch URL Ingestion Flow",
   "tags": [
     "openai",
diff --git a/frontend/components/knowledge-search-input.tsx b/frontend/components/knowledge-search-input.tsx
index fd840628..57899a16 100644
--- a/frontend/components/knowledge-search-input.tsx
+++ b/frontend/components/knowledge-search-input.tsx
@@ -74,7 +74,7 @@ export const KnowledgeSearchInput = () => {
         {queryOverride && (
           <Button
             variant="ghost"
-            className="h-full !px-1.5 !py-0"
+            className="h-full rounded-sm !px-1.5 !py-0"
             type="button"
             onClick={() => {
               setSearchQueryInput("");
@@ -87,7 +87,7 @@ export const KnowledgeSearchInput = () => {
         <Button
           variant="ghost"
           className={cn(
-            "h-full !px-1.5 !py-0 hidden group-focus-within/input:block",
+            "h-full rounded-sm !px-1.5 !py-0 hidden group-focus-within/input:block",
             searchQueryInput && "block"
           )}
           type="submit"
diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx
index 6cb8aa96..a318d511 100644
--- a/frontend/src/app/admin/page.tsx
+++ b/frontend/src/app/admin/page.tsx
@@ -326,4 +326,4 @@ export default function ProtectedAdminPage() {
       <AdminPage />
     </ProtectedRoute>
   )
-}
\ No newline at end of file
+}
diff --git a/frontend/src/app/connectors/page.tsx b/frontend/src/app/connectors/page.tsx
index ad70ec90..06aa0265 100644
--- a/frontend/src/app/connectors/page.tsx
+++ b/frontend/src/app/connectors/page.tsx
@@ -92,6 +92,7 @@ export default function ConnectorsPage() {
           selectedFiles={selectedFiles}
           isAuthenticated={false} // This would come from auth context in real usage
           accessToken={undefined} // This would come from connected account
+          isIngesting={isSyncing}
         />
       </div>
 
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 080120cc..189eef7c 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -5,14 +5,9 @@ import { useRouter, useSearchParams } from "next/navigation";
 import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
 // import { Label } from "@/components/ui/label";
 // import { Checkbox } from "@/components/ui/checkbox";
-import { filterAccentClasses } from "@/components/knowledge-filter-panel";
 import { ProtectedRoute } from "@/components/protected-route";
 import { Button } from "@/components/ui/button";
-import { Checkbox } from "@/components/ui/checkbox";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
 import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
-import { useTask } from "@/contexts/task-context";
 import {
   type ChunkResult,
   type File,
@@ -35,9 +30,9 @@ function ChunksPageContent() {
   const { parsedFilterData, queryOverride } = useKnowledgeFilter();
   const filename = searchParams.get("filename");
   const [chunks, setChunks] = useState<ChunkResult[]>([]);
-  const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
-    ChunkResult[]
-  >([]);
+  // const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
+  //   ChunkResult[]
+  // >([]);
   // const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
   const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
     number | null
@@ -83,13 +78,13 @@ function ChunksPageContent() {
   }, [data, filename]);
 
   // Set selected state for all checkboxes when selectAll changes
-  useEffect(() => {
-    if (selectAll) {
-      setSelectedChunks(new Set(chunks.map((_, index) => index)));
-    } else {
-      setSelectedChunks(new Set());
-    }
-  }, [selectAll, setSelectedChunks, chunks]);
+  // useEffect(() => {
+  //   if (selectAll) {
+  //     setSelectedChunks(new Set(chunks.map((_, index) => index)));
+  //   } else {
+  //     setSelectedChunks(new Set());
+  //   }
+  // }, [selectAll, setSelectedChunks, chunks]);
 
   const handleBack = useCallback(() => {
     router.push("/knowledge");
@@ -126,26 +121,25 @@ function ChunksPageContent() {
 
   return (
     <div className="flex flex-col h-full">
-      <div className="flex flex-col h-full">
-        {/* Header */}
-        <div className="flex flex-col mb-6">
-          <div className="flex items-center gap-3 mb-6">
-            <Button
-              variant="ghost"
-              onClick={handleBack}
-              size="sm"
-              className="max-w-8 max-h-8 -m-2"
-            >
-              <ArrowLeft size={24} />
-            </Button>
-            <h1 className="text-lg font-semibold">
-              {/* Removes file extension from filename */}
-              {filename.replace(/\.[^/.]+$/, "")}
-            </h1>
-          </div>
-          <div className="flex flex-1">
-            <KnowledgeSearchInput />
-            {/* <div className="flex items-center pl-4 gap-2">
+      {/* Header */}
+      <div className="flex flex-col mb-6">
+        <div className="flex items-center gap-3 mb-6">
+          <Button
+            variant="ghost"
+            onClick={handleBack}
+            size="sm"
+            className="max-w-8 max-h-8 -m-2"
+          >
+            <ArrowLeft size={24} />
+          </Button>
+          <h1 className="text-lg font-semibold">
+            {/* Removes file extension from filename */}
+            {filename.replace(/\.[^/.]+$/, "")}
+          </h1>
+        </div>
+        <div className="flex flex-1">
+          <KnowledgeSearchInput />
+          {/* <div className="flex items-center pl-4 gap-2">
               <Checkbox
                 id="selectAllChunks"
                 checked={selectAll}
@@ -160,11 +154,12 @@ function ChunksPageContent() {
                 Select all
               </Label>
             </div> */}
-          </div>
         </div>
+      </div>
 
-        {/* Content Area - matches knowledge page structure */}
-        <div className="flex-1 overflow-auto pr-6">
+      <div className="grid gap-6 grid-cols-1 lg:grid-cols-[3fr_1fr]">
+        {/* Content Area */}
+        <div className="row-start-2 lg:row-start-1">
           {isFetching ? (
             <div className="flex items-center justify-center h-64">
               <div className="text-center">
@@ -185,7 +180,7 @@ function ChunksPageContent() {
             </div>
           ) : (
             <div className="space-y-4 pb-6">
-              {chunksFilteredByQuery.map((chunk, index) => (
+              {chunks.map((chunk, index) => (
                 <div
                   key={chunk.filename + index}
                   className="bg-muted rounded-lg p-4 border border-border/50"
@@ -242,31 +237,30 @@ function ChunksPageContent() {
             </div>
           )}
         </div>
-      </div>
-      {/* Right panel - Summary (TODO), Technical details,  */}
-      {chunks.length > 0 && (
-        <div className="w-[320px] py-20 px-2">
-          <div className="mb-8">
-            <h2 className="text-xl font-semibold mt-3 mb-4">
-              Technical details
-            </h2>
-            <dl>
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">
-                  Total chunks
-                </dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {chunks.length}
-                </dd>
-              </div>
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {averageChunkLength.toFixed(0)} chars
-                </dd>
-              </div>
-              {/* TODO: Uncomment after data is available */}
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+        {/* Right panel - Summary (TODO), Technical details,  */}
+        {chunks.length > 0 && (
+          <div className="min-w-[200px]">
+            <div className="mb-8">
+              <h2 className="text-xl font-semibold mb-4">Technical details</h2>
+              <dl>
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  <dt className="text-sm/6 text-muted-foreground">
+                    Total chunks
+                  </dt>
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                    {chunks.length}
+                  </dd>
+                </div>
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  <dt className="text-sm/6 text-muted-foreground">
+                    Avg length
+                  </dt>
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                    {averageChunkLength.toFixed(0)} chars
+                  </dd>
+                </div>
+                {/* TODO: Uncomment after data is available */}
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Process time</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
               </dd>
@@ -276,54 +270,55 @@ function ChunksPageContent() {
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
               </dd>
             </div> */}
-            </dl>
-          </div>
-          <div className="mb-8">
-            <h2 className="text-xl font-semibold mt-2 mb-3">
-              Original document
-            </h2>
-            <dl>
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              </dl>
+            </div>
+            <div className="mb-4">
+              <h2 className="text-xl font-semibold mt-2 mb-3">
+                Original document
+              </h2>
+              <dl>
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Name</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                 {fileData?.filename}
               </dd>
             </div> */}
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">Type</dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
-                </dd>
-              </div>
-              <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
-                <dt className="text-sm/6 text-muted-foreground">Size</dt>
-                <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                  {fileData?.size
-                    ? `${Math.round(fileData.size / 1024)} KB`
-                    : "Unknown"}
-                </dd>
-              </div>
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  <dt className="text-sm/6 text-muted-foreground">Type</dt>
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                    {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+                  </dd>
+                </div>
+                <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                  <dt className="text-sm/6 text-muted-foreground">Size</dt>
+                  <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                    {fileData?.size
+                      ? `${Math.round(fileData.size / 1024)} KB`
+                      : "Unknown"}
+                  </dd>
+                </div>
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                 N/A
               </dd>
             </div> */}
-              {/* TODO: Uncomment after data is available */}
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* TODO: Uncomment after data is available */}
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Source</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
             </div> */}
-              {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+                {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Updated</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
                 N/A
               </dd>
             </div> */}
-            </dl>
+              </dl>
+            </div>
           </div>
-        </div>
-      )}
+        )}
+      </div>
     </div>
   );
 }
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index 6fe74c4c..148da3bd 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -85,6 +85,7 @@ interface Connector {
   connectionId?: string;
   access_token?: string;
   selectedFiles?: GoogleDriveFile[] | OneDriveFile[];
+  available?: boolean;
 }
 
 interface SyncResult {
diff --git a/frontend/src/app/upload/[provider]/page.tsx b/frontend/src/app/upload/[provider]/page.tsx
index 10b9b0e5..b144106d 100644
--- a/frontend/src/app/upload/[provider]/page.tsx
+++ b/frontend/src/app/upload/[provider]/page.tsx
@@ -165,7 +165,7 @@ export default function UploadProviderPage() {
 
   const handleFileSelected = (files: CloudFile[]) => {
     setSelectedFiles(files);
-    console.log(`Selected ${files.length} files from ${provider}:`, files);
+    console.log(`Selected ${files.length} item(s) from ${provider}:`, files);
     // You can add additional handling here like triggering sync, etc.
   };
 
@@ -376,19 +376,19 @@ export default function UploadProviderPage() {
                 loading={isIngesting}
                 disabled={!hasSelectedFiles || isIngesting}
               >
-                {!hasSelectedFiles ? (
-                  <>Ingest files</>
-                ) : (
+                {hasSelectedFiles ? (
                   <>
-                    Ingest {selectedFiles.length} file
+                    Ingest {selectedFiles.length} item
                     {selectedFiles.length > 1 ? "s" : ""}
                   </>
+                ) : (
+                  <>Ingest selected items</>
                 )}
               </Button>
             </TooltipTrigger>
             {!hasSelectedFiles ? (
               <TooltipContent side="left">
-                Select at least one file before ingesting
+                Select at least one item before ingesting
               </TooltipContent>
             ) : null}
           </Tooltip>
diff --git a/frontend/src/components/cloud-connectors-dialog.tsx b/frontend/src/components/cloud-connectors-dialog.tsx
index d38cf44f..ee7dfbbe 100644
--- a/frontend/src/components/cloud-connectors-dialog.tsx
+++ b/frontend/src/components/cloud-connectors-dialog.tsx
@@ -201,7 +201,7 @@ export function CloudConnectorsDialog({
         <DialogHeader>
           <DialogTitle>Cloud File Connectors</DialogTitle>
           <DialogDescription>
-            Select files from your connected cloud storage providers
+            Select files or folders from your connected cloud storage providers
           </DialogDescription>
         </DialogHeader>
 
@@ -232,7 +232,7 @@ export function CloudConnectorsDialog({
                         !connector.hasAccessToken
                           ? connector.accessTokenError ||
                             "Access token required - try reconnecting your account"
-                          : `Select files from ${connector.name}`
+                          : `Select files or folders from ${connector.name}`
                       }
                       onClick={e => {
                         e.preventDefault();
@@ -283,6 +283,7 @@ export function CloudConnectorsDialog({
                         accessToken={connectorAccessTokens[connector.type]}
                         onPickerStateChange={() => {}}
                         clientId={connector.clientId}
+                        isIngesting={false}
                       />
                     </div>
                   );
diff --git a/frontend/src/components/cloud-picker/file-list.tsx b/frontend/src/components/cloud-picker/file-list.tsx
index 7033fcf8..8cf2b728 100644
--- a/frontend/src/components/cloud-picker/file-list.tsx
+++ b/frontend/src/components/cloud-picker/file-list.tsx
@@ -26,7 +26,7 @@ export const FileList = ({
   return (
     <div className="space-y-2 relative">
       <div className="flex items-center justify-between">
-        <p className="text-sm font-medium">Added files ({files.length})</p>
+        <p className="text-sm font-medium">Selected items ({files.length})</p>
         <Button
           ignoreTitleCase={true}
           onClick={onClearAll}
diff --git a/frontend/src/components/cloud-picker/picker-header.tsx b/frontend/src/components/cloud-picker/picker-header.tsx
index e0d9cfa4..d907174e 100644
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@@ -39,7 +39,7 @@ export const PickerHeader = ({
     return (
       <div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
         Please connect to {getProviderName(provider)} first to select specific
-        files.
+        files or folders.
       </div>
     );
   }
@@ -48,7 +48,7 @@ export const PickerHeader = ({
     <Card>
       <CardContent className="flex flex-col items-center text-center py-8">
         <p className="text-sm text-primary mb-4">
-          Select files from {getProviderName(provider)} to ingest.
+          Select files or folders from {getProviderName(provider)} to ingest.
         </p>
         <Button
           onClick={onAddFiles}
@@ -56,7 +56,7 @@ export const PickerHeader = ({
           className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
         >
           <Plus className="h-4 w-4" />
-          {isPickerOpen ? "Opening picker..." : "Add files"}
+          {isPickerOpen ? "Opening picker..." : "Add files or folders"}
         </Button>
       </CardContent>
     </Card>
diff --git a/frontend/src/components/cloud-picker/provider-handlers.ts b/frontend/src/components/cloud-picker/provider-handlers.ts
index 4a39312f..5b0a8258 100644
--- a/frontend/src/components/cloud-picker/provider-handlers.ts
+++ b/frontend/src/components/cloud-picker/provider-handlers.ts
@@ -52,12 +52,16 @@ export class GoogleDriveHandler {
     try {
       this.onPickerStateChange?.(true);
 
+      // Create a view for regular documents
+      const docsView = new window.google.picker.DocsView()
+        .setIncludeFolders(true)
+        .setSelectFolderEnabled(true);
+
       const picker = new window.google.picker.PickerBuilder()
-        .addView(window.google.picker.ViewId.DOCS)
-        .addView(window.google.picker.ViewId.FOLDERS)
+        .addView(docsView)
         .setOAuthToken(this.accessToken)
         .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
-        .setTitle("Select files from Google Drive")
+        .setTitle("Select files or folders from Google Drive")
         .setCallback(data => this.pickerCallback(data, onFileSelected))
         .build();
 
diff --git a/frontend/src/components/cloud-picker/types.ts b/frontend/src/components/cloud-picker/types.ts
index 20b1eae0..85ce83a9 100644
--- a/frontend/src/components/cloud-picker/types.ts
+++ b/frontend/src/components/cloud-picker/types.ts
@@ -53,6 +53,7 @@ declare global {
           load: (callback: () => void) => void;
         };
         PickerBuilder: new () => GooglePickerBuilder;
+        DocsView: new () => GoogleDocsView;
         ViewId: {
           DOCS: string;
           FOLDERS: string;
@@ -83,8 +84,13 @@ declare global {
   }
 }
 
+export interface GoogleDocsView {
+  setIncludeFolders: (include: boolean) => GoogleDocsView;
+  setSelectFolderEnabled: (enabled: boolean) => GoogleDocsView;
+}
+
 export interface GooglePickerBuilder {
-  addView: (view: string) => GooglePickerBuilder;
+  addView: (view: GoogleDocsView | string) => GooglePickerBuilder;
   setOAuthToken: (token: string) => GooglePickerBuilder;
   setCallback: (
     callback: (data: GooglePickerData) => void
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index 12ad3c24..9b3d9908 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -19,6 +19,7 @@ import {
 import { useAuth } from "@/contexts/auth-context";
 
 // Task interface is now imported from useGetTasksQuery
+export type { Task };
 
 export interface TaskFile {
   filename: string;
diff --git a/pyproject.toml b/pyproject.toml
index be8d359c..00f8409f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,10 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
 [project]
 name = "openrag"
-version = "0.1.14.dev3"
+version = "0.1.19"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
@@ -31,6 +35,9 @@ dependencies = [
     "docling-serve>=1.4.1",
 ]
 
+[dependency-groups]
+dev = ["pytest>=8", "pytest-asyncio>=0.21.0", "pytest-mock>=3.12.0", "pytest-cov>=4.0.0"]
+
 [project.scripts]
 openrag = "tui.main:run_tui"
 
diff --git a/scripts/docling_ctl.py b/scripts/docling_ctl.py
new file mode 100644
index 00000000..8dc5c879
--- /dev/null
+++ b/scripts/docling_ctl.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""Helper script to control docling-serve using DoclingManager for CI/testing."""
+
+import sys
+import asyncio
+import argparse
+from pathlib import Path
+
+# Add src to path so we can import DoclingManager
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from tui.managers.docling_manager import DoclingManager
+
+
+async def start_docling(port: int = 5001, host: str = None, enable_ui: bool = False):
+    """Start docling-serve."""
+    manager = DoclingManager()
+
+    if manager.is_running():
+        print(f"Docling-serve is already running")
+        status = manager.get_status()
+        print(f"Endpoint: {status['endpoint']}")
+        return 0
+
+    host_msg = f"{host}:{port}" if host else f"auto-detected host:{port}"
+    print(f"Starting docling-serve on {host_msg}...")
+    success, message = await manager.start(port=port, host=host, enable_ui=enable_ui)
+
+    if success:
+        print(f"{message}")
+        status = manager.get_status()
+        print(f"Endpoint: {status['endpoint']}")
+        print(f"PID: {status['pid']}")
+        return 0
+    else:
+        print(f"{message}", file=sys.stderr)
+        return 1
+
+
+async def stop_docling():
+    """Stop docling-serve."""
+    manager = DoclingManager()
+
+    if not manager.is_running():
+        print("Docling-serve is not running")
+        return 0
+
+    print("Stopping docling-serve...")
+    success, message = await manager.stop()
+
+    if success:
+        print(f"{message}")
+        return 0
+    else:
+        print(f"{message}", file=sys.stderr)
+        return 1
+
+
+async def status_docling():
+    """Get docling-serve status."""
+    manager = DoclingManager()
+    status = manager.get_status()
+
+    print(f"Status: {status['status']}")
+    if status['status'] == 'running':
+        print(f"Endpoint: {status['endpoint']}")
+        print(f"Docs: {status['docs_url']}")
+        print(f"PID: {status['pid']}")
+
+    return 0 if status['status'] == 'running' else 1
+
+
+async def main():
+    parser = argparse.ArgumentParser(description="Control docling-serve for CI/testing")
+    parser.add_argument("command", choices=["start", "stop", "status"], help="Command to run")
+    parser.add_argument("--port", type=int, default=5001, help="Port to run on (default: 5001)")
+    parser.add_argument("--host", default=None, help="Host to bind to (default: auto-detect for containers)")
+    parser.add_argument("--enable-ui", action="store_true", help="Enable UI")
+
+    args = parser.parse_args()
+
+    if args.command == "start":
+        return await start_docling(port=args.port, host=args.host if args.host else None, enable_ui=args.enable_ui)
+    elif args.command == "stop":
+        return await stop_docling()
+    elif args.command == "status":
+        return await status_docling()
+
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))
diff --git a/src/api/docling.py b/src/api/docling.py
new file mode 100644
index 00000000..22b709ef
--- /dev/null
+++ b/src/api/docling.py
@@ -0,0 +1,120 @@
+"""Docling service proxy endpoints."""
+
+import socket
+import struct
+from pathlib import Path
+
+import httpx
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from utils.container_utils import (
+    detect_container_environment,
+    get_container_host,
+    guess_host_ip_for_containers,
+)
+from utils.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+def _get_gateway_ip_from_route() -> str | None:
+    """Return the default gateway IP visible from the current network namespace."""
+    try:
+        with Path("/proc/net/route").open() as route_table:
+            next(route_table)  # Skip header
+            for line in route_table:
+                fields = line.strip().split()
+                min_fields = 3  # interface, destination, gateway
+                if len(fields) >= min_fields and fields[1] == "00000000":
+                    gateway_hex = fields[2]
+                    gw_int = int(gateway_hex, 16)
+                    gateway_ip = socket.inet_ntoa(struct.pack("<L", gw_int))
+                    return gateway_ip
+    except (FileNotFoundError, PermissionError, IndexError, ValueError) as err:
+        logger.warning("Could not read routing table: %s", err)
+
+    return None
+
+
+def determine_docling_host() -> str:
+    """Determine the host address used for docling health checks."""
+    container_type = detect_container_environment()
+    if container_type:
+        # Try HOST_DOCKER_INTERNAL env var first
+        container_host = get_container_host()
+        if container_host:
+            logger.info("Using container-aware host '%s'", container_host)
+            return container_host
+
+        # Try special hostnames (Docker Desktop and rootless podman)
+        import socket
+        for hostname in ["host.docker.internal", "host.containers.internal"]:
+            try:
+                socket.getaddrinfo(hostname, None)
+                logger.info("Using %s for container-to-host communication", hostname)
+                return hostname
+            except socket.gaierror:
+                logger.debug("%s not available", hostname)
+
+        # Try gateway IP detection (Docker on Linux)
+        gateway_ip = _get_gateway_ip_from_route()
+        if gateway_ip:
+            logger.info("Detected host gateway IP: %s", gateway_ip)
+            return gateway_ip
+
+        # Fallback to bridge IP
+        fallback_ip = guess_host_ip_for_containers(logger=logger)
+        logger.info("Falling back to container bridge host %s", fallback_ip)
+        return fallback_ip
+
+    # Running outside a container
+    logger.info("Running outside a container; using localhost")
+    return "localhost"
+
+
+# Detect the host IP once at startup
+HOST_IP = determine_docling_host()
+DOCLING_SERVICE_URL = f"http://{HOST_IP}:5001"
+
+
+async def health(request: Request) -> JSONResponse:
+    """
+    Proxy health check to docling-serve.
+    This allows the frontend to check docling status via same-origin request.
+    """
+    health_url = f"{DOCLING_SERVICE_URL}/health"
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                health_url,
+                timeout=2.0
+            )
+
+            if response.status_code == 200:
+                return JSONResponse({
+                    "status": "healthy",
+                    "host": HOST_IP
+                })
+            else:
+                logger.warning("Docling health check failed", url=health_url, status_code=response.status_code)
+                return JSONResponse({
+                    "status": "unhealthy",
+                    "message": f"Health check failed with status: {response.status_code}",
+                    "host": HOST_IP
+                }, status_code=503)
+
+    except httpx.TimeoutException:
+        logger.warning("Docling health check timeout", url=health_url)
+        return JSONResponse({
+            "status": "unhealthy",
+            "message": "Connection timeout",
+            "host": HOST_IP
+        }, status_code=503)
+    except Exception as e:
+        logger.error("Docling health check failed", url=health_url, error=str(e))
+        return JSONResponse({
+            "status": "unhealthy",
+            "message": str(e),
+            "host": HOST_IP
+        }, status_code=503)
diff --git a/src/auth_middleware.py b/src/auth_middleware.py
index 44d1b2f0..1bc6cf04 100644
--- a/src/auth_middleware.py
+++ b/src/auth_middleware.py
@@ -28,7 +28,6 @@ def require_auth(session_manager):
         async def wrapper(request: Request):
             # In no-auth mode, bypass authentication entirely
             if is_no_auth_mode():
-                logger.debug("No-auth mode: Creating anonymous user")
                 # Create an anonymous user object so endpoints don't break
                 from session_manager import User
                 from datetime import datetime
@@ -36,7 +35,6 @@ def require_auth(session_manager):
                 from session_manager import AnonymousUser
                 request.state.user = AnonymousUser()
                 request.state.jwt_token = None  # No JWT in no-auth mode
-                logger.debug("Set user_id=anonymous, jwt_token=None")
                 return await handler(request)
 
             user = get_current_user(request, session_manager)
diff --git a/src/config/settings.py b/src/config/settings.py
index 6f55520d..598ccfb2 100644
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -13,8 +13,8 @@ from utils.container_utils import get_container_host
 from utils.document_processing import create_document_converter
 from utils.logging_config import get_logger
 
-load_dotenv()
-load_dotenv("../")
+load_dotenv(override=False)
+load_dotenv("../", override=False)
 
 logger = get_logger(__name__)
 
@@ -61,12 +61,6 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
 def is_no_auth_mode():
     """Check if we're running in no-auth mode (OAuth credentials missing)"""
     result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
-    logger.debug(
-        "Checking auth mode",
-        no_auth_mode=result,
-        has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None,
-        has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None,
-    )
     return result
 
 
diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py
index 48a445bf..66b67519 100644
--- a/src/connectors/google_drive/connector.py
+++ b/src/connectors/google_drive/connector.py
@@ -1,21 +1,20 @@
 import io
 import os
-from pathlib import Path
 import time
 from collections import deque
 from dataclasses import dataclass
-from typing import Dict, List, Any, Optional, Iterable, Set
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Set
 
 from googleapiclient.errors import HttpError
 from googleapiclient.http import MediaIoBaseDownload
+
 from utils.logging_config import get_logger
 
-logger = get_logger(__name__)
-
-# Project-specific base types (adjust imports to your project)
 from ..base import BaseConnector, ConnectorDocument, DocumentACL
 from .oauth import GoogleDriveOAuth
 
+logger = get_logger(__name__)
 
 # -------------------------
 # Config model
@@ -32,8 +31,8 @@ class GoogleDriveConfig:
     recursive: bool = True
 
     # Shared Drives control
-    drive_id: Optional[str] = None        # when set, we use corpora='drive'
-    corpora: Optional[str] = None         # 'user' | 'drive' | 'domain'; auto-picked if None
+    drive_id: Optional[str] = None  # when set, we use corpora='drive'
+    corpora: Optional[str] = None  # 'user' | 'drive' | 'domain'; auto-picked if None
 
     # Optional filtering
     include_mime_types: Optional[List[str]] = None
@@ -80,7 +79,6 @@ class GoogleDriveConnector(BaseConnector):
     _FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
     _FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
 
-
     def emit(self, doc: ConnectorDocument) -> None:
         """
         Emit a ConnectorDocument instance.
@@ -100,7 +98,9 @@ class GoogleDriveConnector(BaseConnector):
 
         # Token file default (so callback & workers don’t need to pass it)
         project_root = Path(__file__).resolve().parent.parent.parent.parent
-        token_file = config.get("token_file") or str(project_root / "google_drive_token.json")
+        token_file = config.get("token_file") or str(
+            project_root / "google_drive_token.json"
+        )
         Path(token_file).parent.mkdir(parents=True, exist_ok=True)
 
         if not isinstance(client_id, str) or not client_id.strip():
@@ -115,7 +115,9 @@ class GoogleDriveConnector(BaseConnector):
             )
 
         # Normalize incoming IDs from any of the supported alias keys
-        def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]:
+        def _first_present_list(
+            cfg: Dict[str, Any], keys: Iterable[str]
+        ) -> Optional[List[str]]:
             for k in keys:
                 v = cfg.get(k)
                 if v:  # accept non-empty list
@@ -151,6 +153,7 @@ class GoogleDriveConnector(BaseConnector):
 
         # Drive client is built in authenticate()
         from google.oauth2.credentials import Credentials
+
         self.creds: Optional[Credentials] = None
         self.service: Any = None
 
@@ -214,7 +217,7 @@ class GoogleDriveConnector(BaseConnector):
                         "id, name, mimeType, modifiedTime, createdTime, size, "
                         "webViewLink, parents, owners, driveId"
                     ),
-                    **self._drives_flags,
+                    **self._drives_get_flags,
                 )
                 .execute()
             )
@@ -285,7 +288,9 @@ class GoogleDriveConnector(BaseConnector):
         Fetch metadata for a file by ID (resolving shortcuts).
         """
         if self.service is None:
-            raise RuntimeError("Google Drive service is not initialized. Please authenticate first.")
+            raise RuntimeError(
+                "Google Drive service is not initialized. Please authenticate first."
+            )
         try:
             meta = (
                 self.service.files()
@@ -323,24 +328,40 @@ class GoogleDriveConnector(BaseConnector):
     def _iter_selected_items(self) -> List[Dict[str, Any]]:
         """
         Return a de-duplicated list of file metadata for the selected scope:
-          - explicit file_ids
+          - explicit file_ids (automatically expands folders to their contents)
           - items inside folder_ids (with optional recursion)
         Shortcuts are resolved to their targets automatically.
         """
         seen: Set[str] = set()
         items: List[Dict[str, Any]] = []
+        folders_to_expand: List[str] = []
 
-        # Explicit files
+        # Process file_ids: separate actual files from folders
         if self.cfg.file_ids:
             for fid in self.cfg.file_ids:
                 meta = self._get_file_meta_by_id(fid)
-                if meta and meta["id"] not in seen:
+                if not meta:
+                    continue
+
+                # If it's a folder, add to folders_to_expand instead
+                if meta.get("mimeType") == "application/vnd.google-apps.folder":
+                    logger.debug(
+                        f"Item {fid} ({meta.get('name')}) is a folder, "
+                        f"will expand to contents"
+                    )
+                    folders_to_expand.append(fid)
+                elif meta["id"] not in seen:
+                    # It's a regular file, add it directly
                     seen.add(meta["id"])
                     items.append(meta)
 
-        # Folders
+        # Collect all folders to expand (from both file_ids and folder_ids)
         if self.cfg.folder_ids:
-            folder_children = self._bfs_expand_folders(self.cfg.folder_ids)
+            folders_to_expand.extend(self.cfg.folder_ids)
+
+        # Expand all folders to their contents
+        if folders_to_expand:
+            folder_children = self._bfs_expand_folders(folders_to_expand)
             for meta in folder_children:
                 meta = self._resolve_shortcut(meta)
                 if meta.get("id") in seen:
@@ -357,7 +378,11 @@ class GoogleDriveConnector(BaseConnector):
 
         items = self._filter_by_mime(items)
         # Exclude folders from final emits:
-        items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
+        items = [
+            m
+            for m in items
+            if m.get("mimeType") != "application/vnd.google-apps.folder"
+        ]
         return items
 
     # -------------------------
@@ -389,29 +414,85 @@ class GoogleDriveConnector(BaseConnector):
     def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
         """
         Download bytes for a given file (exporting if Google-native).
+        Raises ValueError if the item is a folder (folders cannot be downloaded).
         """
         file_id = file_meta["id"]
+        file_name = file_meta.get("name", "unknown")
         mime_type = file_meta.get("mimeType") or ""
 
-        # Google-native: export
-        export_mime = self._pick_export_mime(mime_type)
-        if mime_type.startswith("application/vnd.google-apps."):
-            # default fallback if not overridden
-            #if not export_mime:
-            #    export_mime = "application/pdf"
-            export_mime = "application/pdf"
+        logger.debug(
+            f"Downloading file {file_id} ({file_name}) with mimetype: {mime_type}"
+        )
+
+        # Folders cannot be downloaded or exported - this should never be reached
+        # as folders are automatically expanded in _iter_selected_items()
+        if mime_type == "application/vnd.google-apps.folder":
+            raise ValueError(
+                f"Cannot download folder {file_id} ({file_name}). "
+                f"This is a bug - folders should be automatically expanded before download."
+            )
+
+        # According to https://stackoverflow.com/questions/65053558/google-drive-api-v3-files-export-method-throws-a-403-error-export-only-support
+        # export_media ONLY works for Google Docs Editors files (Docs, Sheets, Slides, Drawings)
+        # All other files (including other Google Apps types like Forms, Sites, Maps) must use get_media
+
+        # Define which Google Workspace files are exportable
+        exportable_types = {
+            "application/vnd.google-apps.document",  # Google Docs
+            "application/vnd.google-apps.spreadsheet",  # Google Sheets
+            "application/vnd.google-apps.presentation",  # Google Slides
+            "application/vnd.google-apps.drawing",  # Google Drawings
+        }
+
+        if mime_type in exportable_types:
+            # This is an exportable Google Workspace file - must use export_media
+            export_mime = self._pick_export_mime(mime_type)
+            if not export_mime:
+                # Default fallback for unsupported Google native types
+                export_mime = "application/pdf"
+
+            logger.debug(
+                f"Using export_media for {file_id} ({mime_type} -> {export_mime})"
+            )
             # NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
-            request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
+            request = self.service.files().export_media(
+                fileId=file_id, mimeType=export_mime
+            )
         else:
+            # This is a regular uploaded file (PDF, image, video, etc.) - use get_media
+            # Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
+            logger.debug(f"Using get_media for {file_id} ({mime_type})")
             # Binary download (get_media also doesn't accept the Drive flags)
             request = self.service.files().get_media(fileId=file_id)
 
+        # Download the file with error handling for misclassified Google Docs
         fh = io.BytesIO()
         downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
         done = False
-        while not done:
-            status, done = downloader.next_chunk()
-            # Optional: you can log progress via status.progress()
+
+        try:
+            while not done:
+                status, done = downloader.next_chunk()
+                # Optional: you can log progress via status.progress()
+        except HttpError as e:
+            # If download fails with "fileNotDownloadable", it's a Docs Editor file
+            # that wasn't properly detected. Retry with export_media.
+            if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
+                logger.warning(
+                    f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
+                    f"Retrying with export_media (file might be a Google Doc)"
+                )
+                export_mime = "application/pdf"
+                request = self.service.files().export_media(
+                    fileId=file_id, mimeType=export_mime
+                )
+                fh = io.BytesIO()
+                downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
+                done = False
+                while not done:
+                    status, done = downloader.next_chunk()
+            else:
+                raise
 
         return fh.getvalue()
 
@@ -430,7 +511,9 @@ class GoogleDriveConnector(BaseConnector):
 
             # If still not authenticated, bail (caller should kick off OAuth init)
             if not await self.oauth.is_authenticated():
-                logger.debug("authenticate: no valid credentials; run OAuth init/callback first.")
+                logger.debug(
+                    "authenticate: no valid credentials; run OAuth init/callback first."
+                )
                 return False
 
             # Build Drive service from OAuth helper
@@ -450,7 +533,7 @@ class GoogleDriveConnector(BaseConnector):
         self,
         page_token: Optional[str] = None,
         max_files: Optional[int] = None,
-        **kwargs
+        **kwargs,
     ) -> Dict[str, Any]:
         """
         List files in the currently selected scope (file_ids/folder_ids/recursive).
@@ -483,15 +566,24 @@ class GoogleDriveConnector(BaseConnector):
             except Exception:
                 pass
             return {"files": [], "next_page_token": None}
-        
+
     async def get_file_content(self, file_id: str) -> ConnectorDocument:
         """
         Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
+        Raises FileNotFoundError if the ID is a folder (folders cannot be downloaded).
         """
         meta = self._get_file_meta_by_id(file_id)
         if not meta:
             raise FileNotFoundError(f"Google Drive file not found: {file_id}")
 
+        # Check if this is a folder - folders cannot be downloaded
+        if meta.get("mimeType") == "application/vnd.google-apps.folder":
+            raise FileNotFoundError(
+                f"Cannot download folder {file_id} ({meta.get('name')}). "
+                f"Folders must be expanded to list their contents. "
+                f"This ID should not have been passed to get_file_content()."
+            )
+
         try:
             blob = self._download_file_bytes(meta)
         except Exception as e:
@@ -527,11 +619,13 @@ class GoogleDriveConnector(BaseConnector):
             metadata={
                 "parents": meta.get("parents"),
                 "driveId": meta.get("driveId"),
-                "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                "size": int(meta.get("size", 0))
+                if str(meta.get("size", "")).isdigit()
+                else None,
             },
         )
         return doc
-    
+
     async def setup_subscription(self) -> str:
         """
         Start a Google Drive Changes API watch (webhook).
@@ -546,10 +640,14 @@ class GoogleDriveConnector(BaseConnector):
         # 1) Ensure we are authenticated and have a live Drive service
         ok = await self.authenticate()
         if not ok:
-            raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
+            raise RuntimeError(
+                "GoogleDriveConnector.setup_subscription: not authenticated"
+            )
 
         # 2) Resolve webhook address (no param in ABC, so pull from config/env)
-        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL")
+        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
+            "GOOGLE_DRIVE_WEBHOOK_URL"
+        )
         if not webhook_address:
             raise RuntimeError(
                 "GoogleDriveConnector.setup_subscription: webhook URL not configured. "
@@ -600,7 +698,9 @@ class GoogleDriveConnector(BaseConnector):
             }
 
             if not isinstance(channel_id, str) or not channel_id:
-                raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
+                raise RuntimeError(
+                    f"Drive watch returned invalid channel id: {channel_id!r}"
+                )
 
             return channel_id
 
@@ -665,13 +765,20 @@ class GoogleDriveConnector(BaseConnector):
             return False
 
         try:
-            self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
+                body={"id": subscription_id, "resourceId": resource_id}
+            ).execute()
 
             # 4) Clear local bookkeeping
-            if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id:
+            if (
+                getattr(self, "_active_channel", None)
+                and self._active_channel.get("channel_id") == subscription_id
+            ):
                 self._active_channel = {}
 
-            if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
+            if hasattr(self, "_subscriptions") and isinstance(
+                self._subscriptions, dict
+            ):
                 self._subscriptions.pop(subscription_id, None)
 
             return True
@@ -682,7 +789,7 @@ class GoogleDriveConnector(BaseConnector):
             except Exception:
                 pass
             return False
-        
+
     async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]:
         """
         Process a Google Drive Changes webhook.
@@ -722,7 +829,9 @@ class GoogleDriveConnector(BaseConnector):
             except Exception as e:
                 selected_ids = set()
                 try:
-                    logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
+                    logger.error(
+                        f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
+                    )
                 except Exception:
                     pass
 
@@ -759,7 +868,11 @@ class GoogleDriveConnector(BaseConnector):
                     # Filter to our selected scope if we have one; otherwise accept all
                     if selected_ids and (rid not in selected_ids):
                         # Shortcut target might be in scope even if the shortcut isn't
-                        tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
+                        tgt = (
+                            fobj.get("shortcutDetails", {}).get("targetId")
+                            if fobj
+                            else None
+                        )
                         if not (tgt and tgt in selected_ids):
                             continue
 
@@ -808,7 +921,9 @@ class GoogleDriveConnector(BaseConnector):
                 blob = self._download_file_bytes(meta)
             except HttpError as e:
                 # Skip/record failures
-                logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
+                logger.error(
+                    f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
+                )
                 continue
 
             from datetime import datetime
@@ -838,7 +953,9 @@ class GoogleDriveConnector(BaseConnector):
                     "webViewLink": meta.get("webViewLink"),
                     "parents": meta.get("parents"),
                     "driveId": meta.get("driveId"),
-                    "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                    "size": int(meta.get("size", 0))
+                    if str(meta.get("size", "")).isdigit()
+                    else None,
                 },
                 content=blob,
             )
@@ -849,7 +966,9 @@ class GoogleDriveConnector(BaseConnector):
     # -------------------------
     def get_start_page_token(self) -> str:
         # getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
-        resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
+        resp = (
+            self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
+        )
         return resp["startPageToken"]
 
     def poll_changes_and_sync(self) -> Optional[str]:
@@ -888,7 +1007,10 @@ class GoogleDriveConnector(BaseConnector):
                 # Match scope
                 if fid not in selected_ids:
                     # also consider shortcut target
-                    if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
+                    if (
+                        file_obj.get("mimeType")
+                        == "application/vnd.google-apps.shortcut"
+                    ):
                         tgt = file_obj.get("shortcutDetails", {}).get("targetId")
                         if tgt and tgt in selected_ids:
                             pass
@@ -923,7 +1045,10 @@ class GoogleDriveConnector(BaseConnector):
                     modified_time=parse_datetime(resolved.get("modifiedTime")),
                     mimetype=str(resolved.get("mimeType", "")),
                     acl=DocumentACL(),  # Set appropriate ACL if needed
-                    metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")},
+                    metadata={
+                        "parents": resolved.get("parents"),
+                        "driveId": resolved.get("driveId"),
+                    },
                     content=blob,
                 )
                 self.emit(doc)
@@ -945,7 +1070,9 @@ class GoogleDriveConnector(BaseConnector):
     # -------------------------
     # Optional: webhook stubs
     # -------------------------
-    def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]:
+    def build_watch_body(
+        self, webhook_address: str, channel_id: Optional[str] = None
+    ) -> Dict[str, Any]:
         """
         Prepare the request body for changes.watch if you use webhooks.
         """
@@ -964,7 +1091,7 @@ class GoogleDriveConnector(BaseConnector):
         body = self.build_watch_body(webhook_address)
         result = (
             self.service.changes()
-            .watch(pageToken=page_token, body=body, **self._drives_flags)
+            .watch(pageToken=page_token, body=body, **self._drives_get_flags)
             .execute()
         )
         return result
@@ -974,7 +1101,9 @@ class GoogleDriveConnector(BaseConnector):
         Stop a previously started webhook watch.
         """
         try:
-            self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
+                body={"id": channel_id, "resourceId": resource_id}
+            ).execute()
             return True
 
         except HttpError as e:
diff --git a/src/connectors/langflow_connector_service.py b/src/connectors/langflow_connector_service.py
index d1a62c4b..b33994e5 100644
--- a/src/connectors/langflow_connector_service.py
+++ b/src/connectors/langflow_connector_service.py
@@ -1,5 +1,3 @@
-import os
-import tempfile
 from typing import Any, Dict, List, Optional
 
 # Create custom processor for connector files using Langflow
@@ -60,14 +58,14 @@ class LangflowConnectorService:
         # Create temporary file from document content
         with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
             # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                 f.write(document.content)
 
             # Step 1: Upload file to Langflow
             logger.debug("Uploading file to Langflow", filename=document.filename)
             content = document.content
             file_tuple = (
-                document.filename.replace(" ", "_").replace("/", "_")+suffix,
+                document.filename.replace(" ", "_").replace("/", "_") + suffix,
                 content,
                 document.mimetype or "application/octet-stream",
             )
@@ -256,7 +254,10 @@ class LangflowConnectorService:
         file_ids: List[str],
         jwt_token: str = None,
     ) -> str:
-        """Sync specific files by their IDs using Langflow processing"""
+        """
+        Sync specific files by their IDs using Langflow processing.
+        Automatically expands folders to their contents.
+        """
         if not self.task_service:
             raise ValueError(
                 "TaskService not available - connector sync requires task service dependency"
@@ -279,10 +280,50 @@ class LangflowConnectorService:
         owner_name = user.name if user else None
         owner_email = user.email if user else None
 
+        # Temporarily set file_ids in the connector's config so list_files() can use them
+        # Store the original values to restore later
+        cfg = getattr(connector, "cfg", None)
+        original_file_ids = None
+        original_folder_ids = None
+
+        if cfg is not None:
+            original_file_ids = getattr(cfg, "file_ids", None)
+            original_folder_ids = getattr(cfg, "folder_ids", None)
+
+        try:
+            # Set the file_ids we want to sync in the connector's config
+            if cfg is not None:
+                cfg.file_ids = file_ids  # type: ignore
+                cfg.folder_ids = None  # type: ignore
+
+            # Get the expanded list of file IDs (folders will be expanded to their contents)
+            # This uses the connector's list_files() which calls _iter_selected_items()
+            result = await connector.list_files()
+            expanded_file_ids = [f["id"] for f in result.get("files", [])]
+
+            if not expanded_file_ids:
+                logger.warning(
+                    f"No files found after expanding file_ids. "
+                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
+                    f"with no contents, or files that were filtered out."
+                )
+                # Return empty task rather than failing
+                raise ValueError("No files to sync after expanding folders")
+
+        except Exception as e:
+            logger.error(f"Failed to expand file_ids via list_files(): {e}")
+            # Fallback to original file_ids if expansion fails
+            expanded_file_ids = file_ids
+        finally:
+            # Restore original config values
+            if cfg is not None:
+                cfg.file_ids = original_file_ids  # type: ignore
+                cfg.folder_ids = original_folder_ids  # type: ignore
+
         processor = LangflowConnectorFileProcessor(
             self,
             connection_id,
-            file_ids,
+            expanded_file_ids,
             user_id,
             jwt_token=jwt_token,
             owner_name=owner_name,
@@ -291,7 +332,7 @@ class LangflowConnectorService:
 
         # Create custom task using TaskService
         task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
         )
 
         return task_id
diff --git a/src/connectors/service.py b/src/connectors/service.py
index 792d8d1f..278743d3 100644
--- a/src/connectors/service.py
+++ b/src/connectors/service.py
@@ -1,16 +1,11 @@
-import tempfile
-import os
-from typing import Dict, Any, List, Optional
+from typing import Any, Dict, List, Optional
 
-from .base import BaseConnector, ConnectorDocument
 from utils.logging_config import get_logger
 
-logger = get_logger(__name__)
-from .google_drive import GoogleDriveConnector
-from .sharepoint import SharePointConnector
-from .onedrive import OneDriveConnector
+from .base import BaseConnector, ConnectorDocument
 from .connection_manager import ConnectionManager
 
+
 logger = get_logger(__name__)
 
 
@@ -56,9 +51,11 @@ class ConnectorService:
         # Create temporary file from document content
         from utils.file_utils import auto_cleanup_tempfile
 
-        with auto_cleanup_tempfile(suffix=self._get_file_extension(document.mimetype)) as tmp_path:
+        with auto_cleanup_tempfile(
+            suffix=self._get_file_extension(document.mimetype)
+        ) as tmp_path:
             # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                 f.write(document.content)
 
             # Use existing process_file_common function with connector document metadata
@@ -71,6 +68,7 @@ class ConnectorService:
 
             # Process using consolidated processing pipeline
             from models.processors import TaskProcessor
+
             processor = TaskProcessor(document_service=doc_service)
             result = await processor.process_document_standard(
                 file_path=tmp_path,
@@ -301,7 +299,10 @@ class ConnectorService:
         file_ids: List[str],
         jwt_token: str = None,
     ) -> str:
-        """Sync specific files by their IDs (used for webhook-triggered syncs)"""
+        """
+        Sync specific files by their IDs (used for webhook-triggered syncs or manual selection).
+        Automatically expands folders to their contents.
+        """
         if not self.task_service:
             raise ValueError(
                 "TaskService not available - connector sync requires task service dependency"
@@ -324,14 +325,53 @@ class ConnectorService:
         owner_name = user.name if user else None
         owner_email = user.email if user else None
 
+        # Temporarily set file_ids in the connector's config so list_files() can use them
+        # Store the original values to restore later
+        original_file_ids = None
+        original_folder_ids = None
+
+        if hasattr(connector, "cfg"):
+            original_file_ids = getattr(connector.cfg, "file_ids", None)
+            original_folder_ids = getattr(connector.cfg, "folder_ids", None)
+
+        try:
+            # Set the file_ids we want to sync in the connector's config
+            if hasattr(connector, "cfg"):
+                connector.cfg.file_ids = file_ids  # type: ignore
+                connector.cfg.folder_ids = None  # type: ignore
+
+            # Get the expanded list of file IDs (folders will be expanded to their contents)
+            # This uses the connector's list_files() which calls _iter_selected_items()
+            result = await connector.list_files()
+            expanded_file_ids = [f["id"] for f in result.get("files", [])]
+
+            if not expanded_file_ids:
+                logger.warning(
+                    f"No files found after expanding file_ids. "
+                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
+                    f"with no contents, or files that were filtered out."
+                )
+                # Return empty task rather than failing
+                raise ValueError("No files to sync after expanding folders")
+
+        except Exception as e:
+            logger.error(f"Failed to expand file_ids via list_files(): {e}")
+            # Fallback to original file_ids if expansion fails
+            expanded_file_ids = file_ids
+        finally:
+            # Restore original config values
+            if hasattr(connector, "cfg"):
+                connector.cfg.file_ids = original_file_ids  # type: ignore
+                connector.cfg.folder_ids = original_folder_ids  # type: ignore
+
         # Create custom processor for specific connector files
         from models.processors import ConnectorFileProcessor
 
-        # We'll pass file_ids as the files_info, the processor will handle ID-only files
+        # Use expanded_file_ids which has folders already expanded
         processor = ConnectorFileProcessor(
             self,
             connection_id,
-            file_ids,
+            expanded_file_ids,
             user_id,
             jwt_token=jwt_token,
             owner_name=owner_name,
@@ -340,7 +380,7 @@ class ConnectorService:
 
         # Create custom task using TaskService
         task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
         )
 
         return task_id
diff --git a/src/main.py b/src/main.py
index a09d2488..1094f8b5 100644
--- a/src/main.py
+++ b/src/main.py
@@ -131,7 +131,7 @@ async def configure_alerting_security():
         # Don't fail startup if alerting config fails
 
 
-async def _ensure_opensearch_index(self):
+async def _ensure_opensearch_index():
     """Ensure OpenSearch index exists when using traditional connector service."""
     try:
         # Check if index already exists
@@ -242,6 +242,9 @@ def generate_jwt_keys():
                 capture_output=True,
             )
 
+            # Set restrictive permissions on private key (readable by owner only)
+            os.chmod(private_key_path, 0o600)
+
             # Generate public key
             subprocess.run(
                 [
@@ -257,12 +260,21 @@ def generate_jwt_keys():
                 capture_output=True,
             )
 
+            # Set permissions on public key (readable by all)
+            os.chmod(public_key_path, 0o644)
+
             logger.info("Generated RSA keys for JWT signing")
         except subprocess.CalledProcessError as e:
             logger.error("Failed to generate RSA keys", error=str(e))
             raise
     else:
-        logger.info("RSA keys already exist, skipping generation")
+        # Ensure correct permissions on existing keys
+        try:
+            os.chmod(private_key_path, 0o600)
+            os.chmod(public_key_path, 0o644)
+            logger.info("RSA keys already exist, ensured correct permissions")
+        except OSError as e:
+            logger.warning("Failed to set permissions on existing keys", error=str(e))
 
 
 async def init_index_when_ready():
diff --git a/src/services/auth_service.py b/src/services/auth_service.py
index 6b19f77a..f58997ac 100644
--- a/src/services/auth_service.py
+++ b/src/services/auth_service.py
@@ -296,11 +296,16 @@ class AuthService:
             try:
                 if self.langflow_mcp_service and isinstance(jwt_token, str) and jwt_token.strip():
                     global_vars = {"JWT": jwt_token}
+                    global_vars["CONNECTOR_TYPE_URL"] = "url"
                     if user_info:
                         if user_info.get("id"):
                             global_vars["OWNER"] = user_info.get("id")
                         if user_info.get("name"):
-                            global_vars["OWNER_NAME"] = user_info.get("name")
+                            # OWNER_NAME may contain spaces, which can cause issues in headers.
+                            # Alternative: URL-encode the owner name to preserve spaces and special characters.
+                            owner_name = user_info.get("name")
+                            if owner_name:
+                                global_vars["OWNER_NAME"] = str(f"\"{owner_name}\"")
                         if user_info.get("email"):
                             global_vars["OWNER_EMAIL"] = user_info.get("email")
 
diff --git a/src/services/document_service.py b/src/services/document_service.py
index 5204ea0e..d596fb25 100644
--- a/src/services/document_service.py
+++ b/src/services/document_service.py
@@ -126,7 +126,11 @@ class DocumentService:
         from utils.file_utils import auto_cleanup_tempfile
         import os
 
-        with auto_cleanup_tempfile() as tmp_path:
+        # Preserve file extension for docling format detection
+        filename = upload_file.filename or "uploaded"
+        suffix = os.path.splitext(filename)[1] or ""
+
+        with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
             # Stream upload file to temporary file
             file_size = 0
             with open(tmp_path, 'wb') as tmp_file:
diff --git a/src/services/models_service.py b/src/services/models_service.py
index 8c779940..01707dd2 100644
--- a/src/services/models_service.py
+++ b/src/services/models_service.py
@@ -242,6 +242,35 @@ class ModelsService:
                 headers["Authorization"] = f"Bearer {api_key}"
             if project_id:
                 headers["Project-ID"] = project_id
+
+            # Validate credentials with a minimal completion request
+            async with httpx.AsyncClient() as client:
+                validation_url = f"{watson_endpoint}/ml/v1/text/generation"
+                validation_params = {"version": "2024-09-16"}
+                validation_payload = {
+                    "input": "test",
+                    "model_id": "ibm/granite-3-2b-instruct",
+                    "project_id": project_id,
+                    "parameters": {
+                        "max_new_tokens": 1,
+                    },
+                }
+
+                validation_response = await client.post(
+                    validation_url,
+                    headers=headers,
+                    params=validation_params,
+                    json=validation_payload,
+                    timeout=10.0,
+                )
+
+                if validation_response.status_code != 200:
+                    raise Exception(
+                        f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
+                    )
+
+                logger.info("IBM Watson credentials validated successfully")
+
             # Fetch foundation models using the correct endpoint
             models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
 
diff --git a/src/tui/__init__.py b/src/tui/__init__.py
index ab225908..0437803a 100644
--- a/src/tui/__init__.py
+++ b/src/tui/__init__.py
@@ -1 +1,8 @@
 """OpenRAG Terminal User Interface package."""
+
+from importlib.metadata import version
+
+try:
+    __version__ = version("openrag")
+except Exception:
+    __version__ = "unknown"
diff --git a/src/tui/_assets/docker-compose-cpu.yml b/src/tui/_assets/docker-compose-cpu.yml
deleted file mode 100644
index 1086737b..00000000
--- a/src/tui/_assets/docker-compose-cpu.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-services:
-  opensearch:
-    image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
-    #build:
-    #  context: .
-    #  dockerfile: Dockerfile
-    container_name: os
-    depends_on:
-      - openrag-backend
-    environment:
-      - discovery.type=single-node
-      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
-    # Run security setup in background after OpenSearch starts
-    command: >
-      bash -c "
-        # Start OpenSearch in background
-        /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-
-        # Wait a bit for OpenSearch to start, then apply security config
-        sleep 10 && /usr/share/opensearch/setup-security.sh &
-
-        # Wait for background processes
-        wait
-      "
-    ports:
-      - "9200:9200"
-      - "9600:9600"
-
-  dashboards:
-    image: opensearchproject/opensearch-dashboards:3.0.0
-    container_name: osdash
-    depends_on:
-      - opensearch
-    environment:
-      OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
-      OPENSEARCH_USERNAME: "admin"
-      OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
-    ports:
-      - "5601:5601"
-
-  openrag-backend:
-    image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.backend
-    container_name: openrag-backend
-    depends_on:
-      - langflow
-    environment:
-      - OPENSEARCH_HOST=opensearch
-      - LANGFLOW_URL=http://langflow:7860
-      - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
-      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
-      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
-      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
-      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
-      - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
-      - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
-      - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
-      - OPENSEARCH_PORT=9200
-      - OPENSEARCH_USERNAME=admin
-      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - NVIDIA_VISIBLE_DEVICES=all
-      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
-      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
-      - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
-      - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
-      - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
-    volumes:
-      - ./documents:/app/documents:Z
-      - ./keys:/app/keys:Z
-      - ./flows:/app/flows:Z
-
-  openrag-frontend:
-    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.frontend
-    container_name: openrag-frontend
-    depends_on:
-      - openrag-backend
-    environment:
-      - OPENRAG_BACKEND_HOST=openrag-backend
-    ports:
-      - "3000:3000"
-
-  langflow:
-    volumes:
-      - ./flows:/app/flows:Z
-    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
-    container_name: langflow
-    ports:
-      - "7860:7860"
-    environment:
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
-      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
-      - JWT=None  
-      - OWNER=None
-      - OWNER_NAME=None
-      - OWNER_EMAIL=None
-      - CONNECTOR_TYPE=system
-      - OPENRAG-QUERY-FILTER="{}"
-      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
-      - FILENAME=None
-      - MIMETYPE=None
-      - FILESIZE=0
-      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
-      - LANGFLOW_LOG_LEVEL=DEBUG
-      - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
-      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
-      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
-      - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
-      # - DEFAULT_FOLDER_NAME=OpenRAG
-      - HIDE_GETTING_STARTED_PROGRESS=true
diff --git a/src/tui/_assets/docker-compose-cpu.yml b/src/tui/_assets/docker-compose-cpu.yml
new file mode 120000
index 00000000..5ad7a663
--- /dev/null
+++ b/src/tui/_assets/docker-compose-cpu.yml
@@ -0,0 +1 @@
+../../../docker-compose-cpu.yml
\ No newline at end of file
diff --git a/src/tui/_assets/docker-compose.yml b/src/tui/_assets/docker-compose.yml
deleted file mode 100644
index 32b72c65..00000000
--- a/src/tui/_assets/docker-compose.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-services:
-  opensearch:
-    image: phact/openrag-opensearch:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile
-    container_name: os
-    depends_on:
-      - openrag-backend
-    environment:
-      - discovery.type=single-node
-      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
-    # Run security setup in background after OpenSearch starts
-    command: >
-      bash -c "
-        # Start OpenSearch in background
-        /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-
-        # Wait a bit for OpenSearch to start, then apply security config
-        sleep 10 && /usr/share/opensearch/setup-security.sh &
-
-        # Wait for background processes
-        wait
-      "
-    ports:
-      - "9200:9200"
-      - "9600:9600"
-
-  dashboards:
-    image: opensearchproject/opensearch-dashboards:3.0.0
-    container_name: osdash
-    depends_on:
-      - opensearch
-    environment:
-      OPENSEARCH_HOSTS: '["https://opensearch:9200"]'
-      OPENSEARCH_USERNAME: "admin"
-      OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD}
-    ports:
-      - "5601:5601"
-
-  openrag-backend:
-    image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.backend
-    container_name: openrag-backend
-    depends_on:
-      - langflow
-    environment:
-      - OPENSEARCH_HOST=opensearch
-      - LANGFLOW_URL=http://langflow:7860
-      - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
-      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
-      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
-      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
-      - LANGFLOW_URL_INGEST_FLOW_ID=${LANGFLOW_URL_INGEST_FLOW_ID}
-      - DISABLE_INGEST_WITH_LANGFLOW=${DISABLE_INGEST_WITH_LANGFLOW:-false}
-      - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
-      - OPENSEARCH_PORT=9200
-      - OPENSEARCH_USERNAME=admin
-      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - NVIDIA_VISIBLE_DEVICES=all
-      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID}
-      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET}
-      - MICROSOFT_GRAPH_OAUTH_CLIENT_ID=${MICROSOFT_GRAPH_OAUTH_CLIENT_ID}
-      - MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=${MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET}
-      - WEBHOOK_BASE_URL=${WEBHOOK_BASE_URL}
-      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
-      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
-    volumes:
-      - ./documents:/app/documents:Z
-      - ./keys:/app/keys:Z
-      - ./flows:/app/flows:Z
-    gpus: all
-
-  openrag-frontend:
-    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.frontend
-    container_name: openrag-frontend
-    depends_on:
-      - openrag-backend
-    environment:
-      - OPENRAG_BACKEND_HOST=openrag-backend
-    ports:
-      - "3000:3000"
-
-  langflow:
-    volumes:
-      - ./flows:/app/flows:Z
-    image: phact/openrag-langflow:${LANGFLOW_VERSION:-latest}
-    container_name: langflow
-    ports:
-      - "7860:7860"
-    environment:
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
-      - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
-      - JWT=None  
-      - OWNER=None
-      - OWNER_NAME=None
-      - OWNER_EMAIL=None
-      - CONNECTOR_TYPE=system
-      - OPENRAG-QUERY-FILTER="{}"
-      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
-      - FILENAME=None
-      - MIMETYPE=None
-      - FILESIZE=0
-      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE,FILENAME,MIMETYPE,FILESIZE
-      - LANGFLOW_LOG_LEVEL=DEBUG
-      - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
-      - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
-      - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE}
-      - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI}
-      # - DEFAULT_FOLDER_NAME="OpenRAG"
-      - HIDE_GETTING_STARTED_PROGRESS=true
diff --git a/src/tui/_assets/docker-compose.yml b/src/tui/_assets/docker-compose.yml
new file mode 120000
index 00000000..5abefb89
--- /dev/null
+++ b/src/tui/_assets/docker-compose.yml
@@ -0,0 +1 @@
+../../../docker-compose.yml
\ No newline at end of file
diff --git a/src/tui/_assets/documents/2506.08231v1.pdf b/src/tui/_assets/documents/2506.08231v1.pdf
deleted file mode 100644
index 61e83265..00000000
Binary files a/src/tui/_assets/documents/2506.08231v1.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/2506.08231v1.pdf b/src/tui/_assets/documents/2506.08231v1.pdf
new file mode 120000
index 00000000..079e1ace
--- /dev/null
+++ b/src/tui/_assets/documents/2506.08231v1.pdf
@@ -0,0 +1 @@
+../../../../documents/2506.08231v1.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/documents/ai-human-resources.pdf b/src/tui/_assets/documents/ai-human-resources.pdf
deleted file mode 100644
index 5e36eab4..00000000
Binary files a/src/tui/_assets/documents/ai-human-resources.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/ai-human-resources.pdf b/src/tui/_assets/documents/ai-human-resources.pdf
new file mode 120000
index 00000000..ba76acc5
--- /dev/null
+++ b/src/tui/_assets/documents/ai-human-resources.pdf
@@ -0,0 +1 @@
+../../../../documents/ai-human-resources.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/documents/warmup_ocr.pdf b/src/tui/_assets/documents/warmup_ocr.pdf
deleted file mode 100644
index 8b17f8b2..00000000
Binary files a/src/tui/_assets/documents/warmup_ocr.pdf and /dev/null differ
diff --git a/src/tui/_assets/documents/warmup_ocr.pdf b/src/tui/_assets/documents/warmup_ocr.pdf
new file mode 120000
index 00000000..10a7670a
--- /dev/null
+++ b/src/tui/_assets/documents/warmup_ocr.pdf
@@ -0,0 +1 @@
+../../../../documents/warmup_ocr.pdf
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_embedding.json b/src/tui/_assets/flows/components/ollama_embedding.json
new file mode 120000
index 00000000..0e3a7516
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_embedding.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_embedding.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_llm.json b/src/tui/_assets/flows/components/ollama_llm.json
new file mode 120000
index 00000000..30c18f43
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_llm.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_llm.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/ollama_llm_text.json b/src/tui/_assets/flows/components/ollama_llm_text.json
new file mode 120000
index 00000000..1b55fd42
--- /dev/null
+++ b/src/tui/_assets/flows/components/ollama_llm_text.json
@@ -0,0 +1 @@
+../../../../../flows/components/ollama_llm_text.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_embedding.json b/src/tui/_assets/flows/components/watsonx_embedding.json
new file mode 120000
index 00000000..3d349dac
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_embedding.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_embedding.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_llm.json b/src/tui/_assets/flows/components/watsonx_llm.json
new file mode 120000
index 00000000..d19d7004
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_llm.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_llm.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/components/watsonx_llm_text.json b/src/tui/_assets/flows/components/watsonx_llm_text.json
new file mode 120000
index 00000000..8f760b2d
--- /dev/null
+++ b/src/tui/_assets/flows/components/watsonx_llm_text.json
@@ -0,0 +1 @@
+../../../../../flows/components/watsonx_llm_text.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/ingestion_flow.json b/src/tui/_assets/flows/ingestion_flow.json
new file mode 120000
index 00000000..6a00e536
--- /dev/null
+++ b/src/tui/_assets/flows/ingestion_flow.json
@@ -0,0 +1 @@
+../../../../flows/ingestion_flow.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_agent.json b/src/tui/_assets/flows/openrag_agent.json
new file mode 120000
index 00000000..fab81ca0
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_agent.json
@@ -0,0 +1 @@
+../../../../flows/openrag_agent.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_ingest_docling.json b/src/tui/_assets/flows/openrag_ingest_docling.json
new file mode 120000
index 00000000..a23a93dc
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_ingest_docling.json
@@ -0,0 +1 @@
+../../../../flows/openrag_ingest_docling.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_nudges.json b/src/tui/_assets/flows/openrag_nudges.json
new file mode 120000
index 00000000..b343ba5d
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_nudges.json
@@ -0,0 +1 @@
+../../../../flows/openrag_nudges.json
\ No newline at end of file
diff --git a/src/tui/_assets/flows/openrag_url_mcp.json b/src/tui/_assets/flows/openrag_url_mcp.json
new file mode 120000
index 00000000..afb2d00e
--- /dev/null
+++ b/src/tui/_assets/flows/openrag_url_mcp.json
@@ -0,0 +1 @@
+../../../../flows/openrag_url_mcp.json
\ No newline at end of file
diff --git a/src/tui/main.py b/src/tui/main.py
index b68293fe..beee4497 100644
--- a/src/tui/main.py
+++ b/src/tui/main.py
@@ -2,6 +2,7 @@
 
 import sys
 from pathlib import Path
+from typing import Iterable, Optional
 from textual.app import App, ComposeResult
 from utils.logging_config import get_logger
 try:
@@ -305,41 +306,103 @@ class OpenRAGTUI(App):
         return True, "Runtime requirements satisfied"
 
 
-def copy_sample_documents():
+def _copy_assets(resource_tree, destination: Path, allowed_suffixes: Optional[Iterable[str]] = None, *, force: bool = False) -> None:
+    """Copy packaged assets into destination and optionally overwrite existing files.
+
+    When ``force`` is True, files are refreshed if the packaged bytes differ.
+    """
+    destination.mkdir(parents=True, exist_ok=True)
+
+    for resource in resource_tree.iterdir():
+        target_path = destination / resource.name
+
+        if resource.is_dir():
+            _copy_assets(resource, target_path, allowed_suffixes, force=force)
+            continue
+
+        if allowed_suffixes and not any(resource.name.endswith(suffix) for suffix in allowed_suffixes):
+            continue
+        resource_bytes = resource.read_bytes()
+
+        if target_path.exists():
+            if not force:
+                continue
+
+            try:
+                if target_path.read_bytes() == resource_bytes:
+                    continue
+            except Exception as read_error:
+                logger.debug(f"Failed to read existing asset {target_path}: {read_error}")
+
+        target_path.write_bytes(resource_bytes)
+        logger.info(f"Copied bundled asset: {target_path}")
+
+
+def copy_sample_documents(*, force: bool = False) -> None:
     """Copy sample documents from package to current directory if they don't exist."""
     documents_dir = Path("documents")
 
-    # Check if documents directory already exists and has files
-    if documents_dir.exists() and any(documents_dir.glob("*.pdf")):
-        return  # Documents already exist, don't overwrite
-
     try:
-        # Get sample documents from package assets
         assets_files = files("tui._assets.documents")
-
-        # Create documents directory if it doesn't exist
-        documents_dir.mkdir(exist_ok=True)
-
-        # Copy each sample document
-        for resource in assets_files.iterdir():
-            if resource.is_file() and resource.name.endswith('.pdf'):
-                dest_path = documents_dir / resource.name
-                if not dest_path.exists():
-                    content = resource.read_bytes()
-                    dest_path.write_bytes(content)
-                    logger.info(f"Copied sample document: {resource.name}")
-
+        _copy_assets(assets_files, documents_dir, allowed_suffixes=(".pdf",), force=force)
     except Exception as e:
         logger.debug(f"Could not copy sample documents: {e}")
         # This is not a critical error - the app can work without sample documents
 
 
+def copy_sample_flows(*, force: bool = False) -> None:
+    """Copy sample flows from package to current directory if they don't exist."""
+    flows_dir = Path("flows")
+
+    try:
+        assets_files = files("tui._assets.flows")
+        _copy_assets(assets_files, flows_dir, allowed_suffixes=(".json",), force=force)
+    except Exception as e:
+        logger.debug(f"Could not copy sample flows: {e}")
+        # The app can proceed without bundled flows
+
+
+def copy_compose_files(*, force: bool = False) -> None:
+    """Copy docker-compose templates into the workspace if they are missing."""
+    try:
+        assets_root = files("tui._assets")
+    except Exception as e:
+        logger.debug(f"Could not access compose assets: {e}")
+        return
+
+    for filename in ("docker-compose.yml", "docker-compose-cpu.yml"):
+        destination = Path(filename)
+        if destination.exists() and not force:
+            continue
+
+        try:
+            resource = assets_root.joinpath(filename)
+            if not resource.is_file():
+                logger.debug(f"Compose template not found in assets: {filename}")
+                continue
+
+            resource_bytes = resource.read_bytes()
+            if destination.exists():
+                try:
+                    if destination.read_bytes() == resource_bytes:
+                        continue
+                except Exception as read_error:
+                    logger.debug(f"Failed to read existing compose file {destination}: {read_error}")
+
+            destination.write_bytes(resource_bytes)
+            logger.info(f"Copied docker-compose template: {filename}")
+        except Exception as error:
+            logger.debug(f"Could not copy compose file {filename}: {error}")
+
+
 def run_tui():
     """Run the OpenRAG TUI application."""
     app = None
     try:
-        # Copy sample documents on first run
-        copy_sample_documents()
+        # Keep bundled assets aligned with the packaged versions
+        copy_sample_documents(force=True)
+        copy_sample_flows(force=True)
+        copy_compose_files(force=True)
 
         app = OpenRAGTUI()
         app.run()
diff --git a/src/tui/managers/docling_manager.py b/src/tui/managers/docling_manager.py
index 7cb5d1e8..e58a5b1e 100644
--- a/src/tui/managers/docling_manager.py
+++ b/src/tui/managers/docling_manager.py
@@ -8,7 +8,6 @@ import threading
 import time
 from typing import Optional, Tuple, Dict, Any, List, AsyncIterator
 from utils.logging_config import get_logger
-from utils.container_utils import guess_host_ip_for_containers
 
 logger = get_logger(__name__)
 
@@ -32,7 +31,8 @@ class DoclingManager:
 
         self._process: Optional[subprocess.Popen] = None
         self._port = 5001
-        self._host = guess_host_ip_for_containers(logger=logger)  # Get appropriate host IP based on runtime
+        # Bind to all interfaces by default (can be overridden with DOCLING_BIND_HOST env var)
+        self._host = os.getenv('DOCLING_BIND_HOST', '0.0.0.0')
         self._running = False
         self._external_process = False
 
@@ -150,16 +150,20 @@ class DoclingManager:
             else:
                 pid = self._load_pid()
 
+            # Use localhost for display URLs when bound to 0.0.0.0
+            display_host = "localhost" if self._host == "0.0.0.0" else self._host
+
             return {
                 "status": "running",
                 "port": self._port,
                 "host": self._host,
-                "endpoint": f"http://{self._host}:{self._port}",
-                "docs_url": f"http://{self._host}:{self._port}/docs",
-                "ui_url": f"http://{self._host}:{self._port}/ui",
+                "endpoint": f"http://{display_host}:{self._port}",
+                "docs_url": f"http://{display_host}:{self._port}/docs",
+                "ui_url": f"http://{display_host}:{self._port}/ui",
                 "pid": pid
             }
         else:
+            display_host = "localhost" if self._host == "0.0.0.0" else self._host
             return {
                 "status": "stopped",
                 "port": self._port,
@@ -176,10 +180,9 @@ class DoclingManager:
             return False, "Docling serve is already running"
 
         self._port = port
-        # Use provided host or the bridge IP we detected in __init__
+        # Use provided host or keep default from __init__
         if host is not None:
             self._host = host
-        # else: keep self._host as already set in __init__
 
         # Check if port is already in use before trying to start
         import socket
@@ -293,7 +296,8 @@ class DoclingManager:
                 self._running = False
                 return False, f"Docling serve process exited immediately (code: {return_code})"
 
-            return True, f"Docling serve starting on http://{host}:{port}"
+            display_host = "localhost" if self._host == "0.0.0.0" else self._host
+            return True, f"Docling serve starting on http://{display_host}:{port}"
 
         except FileNotFoundError:
             return False, "docling-serve not available. Please install: uv add docling-serve"
@@ -454,7 +458,8 @@ class DoclingManager:
     async def follow_logs(self) -> AsyncIterator[str]:
         """Follow logs from the docling-serve process in real-time."""
         # First yield status message and any existing logs
-        status_msg = f"Docling serve is running on http://{self._host}:{self._port}"
+        display_host = "localhost" if self._host == "0.0.0.0" else self._host
+        status_msg = f"Docling serve is running on http://{display_host}:{self._port}"
 
         with self._log_lock:
             if self._log_buffer:
diff --git a/src/tui/screens/welcome.py b/src/tui/screens/welcome.py
index 217b0611..ea85de9e 100644
--- a/src/tui/screens/welcome.py
+++ b/src/tui/screens/welcome.py
@@ -10,6 +10,7 @@ from rich.text import Text
 from rich.align import Align
 from dotenv import load_dotenv
 
+from .. import __version__
 from ..managers.container_manager import ContainerManager, ServiceStatus
 from ..managers.env_manager import EnvManager
 from ..managers.docling_manager import DoclingManager
@@ -116,7 +117,8 @@ class WelcomeScreen(Screen):
 ╚═════╝ ╚═╝     ╚══════╝╚═╝  ╚═══╝╚═╝  ╚═╝╚═╝  ╚═╝╚═════╝
 """
         welcome_text.append(ascii_art, style="bold white")
-        welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
+        welcome_text.append("Terminal User Interface for OpenRAG\n", style="dim")
+        welcome_text.append(f"v{__version__}\n\n", style="dim cyan")
 
         # Check if all services are running
         all_services_running = self.services_running and self.docling_running
diff --git a/src/utils/container_utils.py b/src/utils/container_utils.py
index 14222c84..746379e8 100644
--- a/src/utils/container_utils.py
+++ b/src/utils/container_utils.py
@@ -157,10 +157,22 @@ def guess_host_ip_for_containers(logger=None) -> str:
     import logging
     import re
     import shutil
+    import socket
     import subprocess
 
     log = logger or logging.getLogger(__name__)
 
+    def can_bind_to_address(ip_addr: str) -> bool:
+        """Test if we can bind to the given IP address."""
+        try:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+                sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+                sock.bind((ip_addr, 0))  # Port 0 = let OS choose a free port
+                return True
+        except (OSError, socket.error) as e:
+            log.debug("Cannot bind to %s: %s", ip_addr, e)
+            return False
+
     def run(cmd, timeout=2, text=True):
         return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
 
@@ -261,10 +273,23 @@ def guess_host_ip_for_containers(logger=None) -> str:
                 "Container-reachable host IP candidates: %s",
                 ", ".join(ordered_candidates),
             )
-        else:
-            log.info("Container-reachable host IP: %s", ordered_candidates[0])
 
-        return ordered_candidates[0]
+        # Try each candidate and return the first one we can bind to
+        for ip_addr in ordered_candidates:
+            if can_bind_to_address(ip_addr):
+                if len(ordered_candidates) > 1:
+                    log.info("Selected bindable host IP: %s", ip_addr)
+                else:
+                    log.info("Container-reachable host IP: %s", ip_addr)
+                return ip_addr
+            log.debug("Skipping %s (cannot bind)", ip_addr)
+
+        # None of the candidates were bindable, fall back to 127.0.0.1
+        log.warning(
+            "None of the discovered IPs (%s) can be bound; falling back to 127.0.0.1",
+            ", ".join(ordered_candidates),
+        )
+        return "127.0.0.1"
 
     log.warning(
         "No container bridge IP found. For rootless Podman (slirp4netns) there may be no host bridge; publish ports or use 10.0.2.2 from the container."
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..5f19b37d
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# Test package
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..7c2ffc1d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,85 @@
+import asyncio
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Force no-auth mode for testing by setting OAuth credentials to empty strings
+# This ensures anonymous JWT tokens are created automatically
+os.environ['GOOGLE_OAUTH_CLIENT_ID'] = ''
+os.environ['GOOGLE_OAUTH_CLIENT_SECRET'] = ''
+
+from src.config.settings import clients
+from src.session_manager import SessionManager
+from src.main import generate_jwt_keys
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    """Create an instance of the default event loop for the test session."""
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    yield loop
+    loop.close()
+
+
+@pytest_asyncio.fixture
+async def opensearch_client():
+    """OpenSearch client for testing - requires running OpenSearch."""
+    await clients.initialize()
+    yield clients.opensearch
+    # Cleanup test indices after tests
+    try:
+        await clients.opensearch.indices.delete(index="test_documents")
+    except Exception:
+        pass
+
+
+@pytest.fixture
+def session_manager():
+    """Session manager for testing."""
+    # Generate RSA keys before creating SessionManager
+    generate_jwt_keys()
+    sm = SessionManager("test-secret-key")
+    print(f"[DEBUG] SessionManager created with keys: private={sm.private_key_path}, public={sm.public_key_path}")
+    return sm
+
+
+@pytest.fixture
+def test_documents_dir():
+    """Create a temporary directory with test documents."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        test_dir = Path(temp_dir)
+        
+        # Create some test files in supported formats
+        (test_dir / "test1.md").write_text("# Machine Learning Document\n\nThis is a test document about machine learning.")
+        (test_dir / "test2.md").write_text("# AI Document\n\nAnother document discussing artificial intelligence.")
+        (test_dir / "test3.md").write_text("# Data Science Document\n\nThis is a markdown file about data science.")
+        
+        # Create subdirectory with files
+        sub_dir = test_dir / "subdir"
+        sub_dir.mkdir()
+        (sub_dir / "nested.md").write_text("# Neural Networks\n\nNested document about neural networks.")
+        
+        yield test_dir
+
+
+@pytest.fixture
+def test_single_file():
+    """Create a single test file."""
+    with tempfile.NamedTemporaryFile(mode='w', suffix='_test_document.md', delete=False) as f:
+        f.write("# Single Test Document\n\nThis is a test document about OpenRAG testing framework. This document contains multiple sentences to ensure proper chunking. The content should be indexed and searchable in OpenSearch after processing.")
+        temp_path = f.name
+    
+    yield temp_path
+    
+    # Cleanup
+    try:
+        os.unlink(temp_path)
+    except FileNotFoundError:
+        pass
\ No newline at end of file
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e27cd7ab
--- /dev/null
+++ b/tests/integration/__init__.py
@@ -0,0 +1 @@
+# Integration tests package
\ No newline at end of file
diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py
new file mode 100644
index 00000000..869928fe
--- /dev/null
+++ b/tests/integration/test_api_endpoints.py
@@ -0,0 +1,296 @@
+import asyncio
+import os
+from pathlib import Path
+
+import httpx
+import pytest
+
+
+async def wait_for_service_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
+    """Poll existing endpoints until the app and OpenSearch are ready.
+
+    Strategy:
+    - GET /auth/me should return 200 immediately (confirms app is up).
+    - POST /search with query "*" avoids embeddings and checks OpenSearch/index readiness.
+    """
+    # First test OpenSearch JWT directly
+    from src.session_manager import SessionManager, AnonymousUser
+    import os
+    import hashlib
+    import jwt as jwt_lib
+    sm = SessionManager("test")
+    test_token = sm.create_jwt_token(AnonymousUser())
+    token_hash = hashlib.sha256(test_token.encode()).hexdigest()[:16]
+    print(f"[DEBUG] Generated test JWT token hash: {token_hash}")
+    print(f"[DEBUG] Using key paths: private={sm.private_key_path}, public={sm.public_key_path}")
+    with open(sm.public_key_path, 'rb') as f:
+        pub_key_hash = hashlib.sha256(f.read()).hexdigest()[:16]
+    print(f"[DEBUG] Public key hash: {pub_key_hash}")
+    # Decode token to see claims
+    decoded = jwt_lib.decode(test_token, options={"verify_signature": False})
+    print(f"[DEBUG] JWT claims: iss={decoded.get('iss')}, sub={decoded.get('sub')}, aud={decoded.get('aud')}, roles={decoded.get('roles')}")
+
+    # Test OpenSearch JWT auth directly
+    opensearch_url = f"https://{os.getenv('OPENSEARCH_HOST', 'localhost')}:{os.getenv('OPENSEARCH_PORT', '9200')}"
+    print(f"[DEBUG] Testing JWT auth directly against: {opensearch_url}/documents/_search")
+    async with httpx.AsyncClient(verify=False) as os_client:
+        r_os = await os_client.post(
+            f"{opensearch_url}/documents/_search",
+            headers={"Authorization": f"Bearer {test_token}"},
+            json={"query": {"match_all": {}}, "size": 0}
+        )
+        print(f"[DEBUG] Direct OpenSearch JWT test: status={r_os.status_code}, body={r_os.text[:500]}")
+        if r_os.status_code == 401:
+            print(f"[DEBUG] ❌ OpenSearch rejected JWT! OIDC config not working.")
+        else:
+            print(f"[DEBUG] ✓ OpenSearch accepted JWT!")
+
+    deadline = asyncio.get_event_loop().time() + timeout_s
+    last_err = None
+    while asyncio.get_event_loop().time() < deadline:
+        try:
+            r1 = await client.get("/auth/me")
+            print(f"[DEBUG] /auth/me status={r1.status_code}, body={r1.text[:200]}")
+            if r1.status_code in (401, 403):
+                raise AssertionError(f"/auth/me returned {r1.status_code}: {r1.text}")
+            if r1.status_code != 200:
+                await asyncio.sleep(0.5)
+                continue
+            # match_all readiness probe; no embeddings
+            r2 = await client.post("/search", json={"query": "*", "limit": 0})
+            print(f"[DEBUG] /search status={r2.status_code}, body={r2.text[:200]}")
+            if r2.status_code in (401, 403):
+                print(f"[DEBUG] Search failed with auth error. Response: {r2.text}")
+                raise AssertionError(f"/search returned {r2.status_code}: {r2.text}")
+            if r2.status_code == 200:
+                print("[DEBUG] Service ready!")
+                return
+            last_err = r2.text
+        except AssertionError:
+            raise
+        except Exception as e:
+            last_err = str(e)
+            print(f"[DEBUG] Exception during readiness check: {e}")
+        await asyncio.sleep(0.5)
+    raise AssertionError(f"Service not ready in time: {last_err}")
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_upload_and_search_endpoint(tmp_path: Path, disable_langflow_ingest: bool):
+    """Boot the ASGI app and exercise /upload and /search endpoints."""
+    # Ensure we route uploads to traditional processor and disable startup ingest
+    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
+    os.environ["DISABLE_STARTUP_INGEST"] = "true"
+    # Force no-auth mode so endpoints bypass authentication
+    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+    # Import after env vars to ensure settings pick them up. Clear cached modules
+    import sys
+    # Clear cached modules so settings pick up env and router sees new flag
+    for mod in [
+        "src.api.router",
+        "api.router",  # Also clear the non-src path
+        "src.api.connector_router",
+        "api.connector_router",
+        "src.config.settings",
+        "config.settings",
+        "src.auth_middleware",
+        "auth_middleware",
+        "src.main",
+        "api",  # Clear the api package itself
+        "src.api",
+        "services",  # Clear services that import clients
+        "src.services",
+        "services.search_service",
+        "src.services.search_service",
+    ]:
+        sys.modules.pop(mod, None)
+    from src.main import create_app, startup_tasks
+    import src.api.router as upload_router
+    from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
+
+    # Ensure a clean index before startup
+    await clients.initialize()
+    try:
+        await clients.opensearch.indices.delete(index=INDEX_NAME)
+        # Wait for deletion to complete
+        await asyncio.sleep(1)
+    except Exception:
+        pass
+
+    app = await create_app()
+    # Manually run startup tasks since httpx ASGI transport here doesn't manage lifespan
+    await startup_tasks(app.state.services)
+
+    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+    from src.main import _ensure_opensearch_index
+    await _ensure_opensearch_index()
+
+    # Verify index is truly empty after startup
+    try:
+        count_response = await clients.opensearch.count(index=INDEX_NAME)
+        doc_count = count_response.get('count', 0)
+        assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
+    except Exception as e:
+        # If count fails, the index might not exist yet, which is fine
+        pass
+
+    transport = httpx.ASGITransport(app=app)
+    try:
+        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+            # Wait for app + OpenSearch readiness using existing endpoints
+            await wait_for_service_ready(client)
+
+            # Create a temporary markdown file to upload
+            file_path = tmp_path / "endpoint_test_doc.md"
+            file_text = (
+                "# Single Test Document\n\n"
+                "This is a test document about OpenRAG testing framework. "
+                "The content should be indexed and searchable in OpenSearch after processing."
+            )
+            file_path.write_text(file_text)
+
+            # POST via router (multipart)
+            files = {
+                "file": (
+                    file_path.name,
+                    file_path.read_bytes(),
+                    "text/markdown",
+                )
+            }
+            upload_resp = await client.post("/upload", files=files)
+            body = upload_resp.json()
+            assert upload_resp.status_code == 201, upload_resp.text
+            assert body.get("status") in {"indexed", "unchanged"}
+            assert isinstance(body.get("id"), str)
+
+            # Poll search for the specific content until it's indexed
+            async def _wait_for_indexed(timeout_s: float = 30.0):
+                deadline = asyncio.get_event_loop().time() + timeout_s
+                while asyncio.get_event_loop().time() < deadline:
+                    resp = await client.post(
+                        "/search",
+                        json={"query": "OpenRAG testing framework", "limit": 5},
+                    )
+                    if resp.status_code == 200 and resp.json().get("results"):
+                        return resp
+                    await asyncio.sleep(0.5)
+                return resp
+
+            search_resp = await _wait_for_indexed()
+
+            # POST /search
+            assert search_resp.status_code == 200, search_resp.text
+            search_body = search_resp.json()
+
+            # Basic shape and at least one hit
+            assert isinstance(search_body.get("results"), list)
+            assert len(search_body["results"]) >= 0
+            # When hits exist, confirm our phrase is present in top result content
+            if search_body["results"]:
+                top = search_body["results"][0]
+                assert "text" in top or "content" in top
+                text = top.get("text") or top.get("content")
+                assert isinstance(text, str)
+                assert "testing" in text.lower()
+    finally:
+        # Explicitly close global clients to avoid aiohttp warnings
+        from src.config.settings import clients
+        try:
+            await clients.close()
+        except Exception:
+            pass
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_router_upload_ingest_traditional(tmp_path: Path, disable_langflow_ingest: bool):
+    """Exercise the router endpoint to ensure it routes to traditional upload when Langflow ingest is disabled."""
+    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = "true" if disable_langflow_ingest else "false"
+    os.environ["DISABLE_STARTUP_INGEST"] = "true"
+    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+    import sys
+    for mod in [
+        "src.api.router",
+        "api.router",  # Also clear the non-src path
+        "src.api.connector_router",
+        "api.connector_router",
+        "src.config.settings",
+        "config.settings",
+        "src.auth_middleware",
+        "auth_middleware",
+        "src.main",
+        "api",  # Clear the api package itself
+        "src.api",
+        "services",  # Clear services that import clients
+        "src.services",
+        "services.search_service",
+        "src.services.search_service",
+    ]:
+        sys.modules.pop(mod, None)
+    from src.main import create_app, startup_tasks
+    import src.api.router as upload_router
+    from src.config.settings import clients, INDEX_NAME, DISABLE_INGEST_WITH_LANGFLOW
+
+    # Ensure a clean index before startup
+    await clients.initialize()
+    try:
+        await clients.opensearch.indices.delete(index=INDEX_NAME)
+        # Wait for deletion to complete
+        await asyncio.sleep(1)
+    except Exception:
+        pass
+
+    app = await create_app()
+    await startup_tasks(app.state.services)
+
+    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+    from src.main import _ensure_opensearch_index
+    await _ensure_opensearch_index()
+
+    # Verify index is truly empty after startup
+    try:
+        count_response = await clients.opensearch.count(index=INDEX_NAME)
+        doc_count = count_response.get('count', 0)
+        assert doc_count == 0, f"Index should be empty after startup but contains {doc_count} documents"
+    except Exception as e:
+        # If count fails, the index might not exist yet, which is fine
+        pass
+    transport = httpx.ASGITransport(app=app)
+    try:
+        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+            await wait_for_service_ready(client)
+
+            file_path = tmp_path / "router_test_doc.md"
+            file_path.write_text("# Router Test\n\nThis file validates the upload router.")
+
+            files = {
+                "file": (
+                    file_path.name,
+                    file_path.read_bytes(),
+                    "text/markdown",
+                )
+            }
+
+            resp = await client.post("/router/upload_ingest", files=files)
+            data = resp.json()
+
+            print(f"data: {data}")
+            if disable_langflow_ingest:
+                assert resp.status_code == 201 or resp.status_code == 202, resp.text
+                assert data.get("status") in {"indexed", "unchanged"}
+                assert isinstance(data.get("id"), str)
+            else:
+                assert resp.status_code == 201 or resp.status_code == 202, resp.text
+                assert isinstance(data.get("task_id"), str)
+                assert data.get("file_count") == 1
+    finally:
+        from src.config.settings import clients
+        try:
+            await clients.close()
+        except Exception:
+            pass
diff --git a/tests/integration/test_startup_ingest.py b/tests/integration/test_startup_ingest.py
new file mode 100644
index 00000000..b2243b33
--- /dev/null
+++ b/tests/integration/test_startup_ingest.py
@@ -0,0 +1,118 @@
+import asyncio
+import os
+from pathlib import Path
+
+import httpx
+import pytest
+
+
+async def wait_for_ready(client: httpx.AsyncClient, timeout_s: float = 30.0):
+    deadline = asyncio.get_event_loop().time() + timeout_s
+    last_err = None
+    while asyncio.get_event_loop().time() < deadline:
+        try:
+            r1 = await client.get("/auth/me")
+            if r1.status_code != 200:
+                await asyncio.sleep(0.5)
+                continue
+            r2 = await client.post("/search", json={"query": "*", "limit": 0})
+            if r2.status_code == 200:
+                return
+            last_err = r2.text
+        except Exception as e:
+            last_err = str(e)
+        await asyncio.sleep(0.5)
+    raise AssertionError(f"Service not ready in time: {last_err}")
+
+
+def count_files_in_documents() -> int:
+    base_dir = Path(os.getcwd()) / "documents"
+    if not base_dir.is_dir():
+        return 0
+    return sum(1 for _ in base_dir.rglob("*") if _.is_file())
+
+
+@pytest.mark.parametrize("disable_langflow_ingest", [True, False])
+@pytest.mark.asyncio
+async def test_startup_ingest_creates_task(disable_langflow_ingest: bool):
+    # Ensure startup ingest runs and choose pipeline per param
+    os.environ["DISABLE_STARTUP_INGEST"] = "false"
+    os.environ["DISABLE_INGEST_WITH_LANGFLOW"] = (
+        "true" if disable_langflow_ingest else "false"
+    )
+    # Force no-auth mode for simpler endpoint access
+    os.environ["GOOGLE_OAUTH_CLIENT_ID"] = ""
+    os.environ["GOOGLE_OAUTH_CLIENT_SECRET"] = ""
+
+    # Reload settings to pick up env for this test run
+    import sys
+
+    for mod in [
+        "src.api.router",
+        "src.api.connector_router",
+        "src.config.settings",
+        "src.auth_middleware",
+        "src.main",
+    ]:
+        sys.modules.pop(mod, None)
+
+    from src.main import create_app, startup_tasks
+    from src.config.settings import clients, INDEX_NAME
+
+    # Ensure a clean index before startup
+    await clients.initialize()
+    try:
+        await clients.opensearch.indices.delete(index=INDEX_NAME)
+    except Exception:
+        pass
+
+    app = await create_app()
+    # Trigger startup tasks explicitly
+    await startup_tasks(app.state.services)
+
+    # Ensure index exists for tests (startup_tasks only creates it if DISABLE_INGEST_WITH_LANGFLOW=True)
+    from src.main import _ensure_opensearch_index
+    await _ensure_opensearch_index()
+
+    transport = httpx.ASGITransport(app=app)
+    try:
+        async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
+            await wait_for_ready(client)
+
+            expected_files = count_files_in_documents()
+
+            # Poll /tasks until we see at least one startup ingest task
+            async def _wait_for_task(timeout_s: float = 60.0):
+                deadline = asyncio.get_event_loop().time() + timeout_s
+                last = None
+                while asyncio.get_event_loop().time() < deadline:
+                    resp = await client.get("/tasks")
+                    if resp.status_code == 200:
+                        data = resp.json()
+                        last = data
+                        tasks = data.get("tasks") if isinstance(data, dict) else None
+                        if isinstance(tasks, list) and len(tasks) > 0:
+                            return tasks
+                    await asyncio.sleep(0.5)
+                return last.get("tasks") if isinstance(last, dict) else last
+
+            tasks = await _wait_for_task()
+            if expected_files == 0:
+                return  # Nothing to do
+            if not (isinstance(tasks, list) and len(tasks) > 0):
+                # Fallback: verify that documents were indexed as a sign of startup ingest
+                sr = await client.post("/search", json={"query": "*", "limit": 1})
+                assert sr.status_code == 200, sr.text
+                total = sr.json().get("total")
+                assert isinstance(total, int) and total >= 0, "Startup ingest did not index documents"
+                return
+            newest = tasks[0]
+            assert "task_id" in newest
+            assert newest.get("total_files") == expected_files
+    finally:
+        # Explicitly close global clients to avoid aiohttp warnings
+        from src.config.settings import clients
+        try:
+            await clients.close()
+        except Exception:
+            pass
diff --git a/uv.lock b/uv.lock
index c9bc6714..14f76a60 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,10 +2,10 @@ version = 1
 revision = 2
 requires-python = ">=3.13"
 resolution-markers = [
-    "sys_platform == 'darwin'",
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+    "sys_platform == 'darwin'",
 ]
 
 [[package]]
@@ -291,8 +291,8 @@ name = "click"
 version = "8.2.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@@ -312,6 +312,67 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
+[[package]]
+name = "coverage"
+version = "7.10.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" },
+    { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" },
+    { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" },
+    { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" },
+    { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" },
+    { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" },
+    { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" },
+    { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" },
+    { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" },
+    { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" },
+    { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" },
+    { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" },
+    { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" },
+    { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" },
+    { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" },
+    { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" },
+    { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" },
+    { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" },
+]
+
 [[package]]
 name = "cramjam"
 version = "2.11.0"
@@ -454,8 +515,8 @@ name = "dill"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
@@ -619,8 +680,8 @@ name = "docling-mcp"
 version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@@ -943,8 +1004,8 @@ name = "fsspec"
 version = "2025.5.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
@@ -1264,8 +1325,8 @@ name = "huggingface-hub"
 version = "0.33.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@@ -1339,6 +1400,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1960,8 +2030,8 @@ name = "multiprocess"
 version = "0.70.18"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
@@ -2282,7 +2352,7 @@ wheels = [
 
 [[package]]
 name = "openrag"
-version = "0.1.14.dev3"
+version = "0.1.19"
 source = { editable = "." }
 dependencies = [
     { name = "agentd" },
@@ -2312,6 +2382,14 @@ dependencies = [
     { name = "uvicorn" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
+    { name = "pytest-mock" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "agentd", specifier = ">=0.2.2" },
@@ -2341,6 +2419,14 @@ requires-dist = [
     { name = "uvicorn", specifier = ">=0.35.0" },
 ]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "pytest", specifier = ">=8" },
+    { name = "pytest-asyncio", specifier = ">=0.21.0" },
+    { name = "pytest-cov", specifier = ">=4.0.0" },
+    { name = "pytest-mock", specifier = ">=3.12.0" },
+]
+
 [[package]]
 name = "opensearch-py"
 version = "3.0.0"
@@ -2836,6 +2922,60 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810, upload-time = "2024-12-19T19:28:09.857Z" },
 ]
 
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+]
+
+[[package]]
+name = "pytest-asyncio"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
+]
+
+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coverage" },
+    { name = "pluggy" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+]
+
+[[package]]
+name = "pytest-mock"
+version = "3.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
+]
+
 [[package]]
 name = "python-bidi"
 version = "0.6.6"
@@ -3622,9 +3762,9 @@ name = "torch"
 version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "sys_platform == 'darwin'",
     "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+    "sys_platform == 'darwin'",
 ]
 dependencies = [
     { name = "filelock", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },
@@ -3669,9 +3809,9 @@ name = "torchvision"
 version = "0.23.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "sys_platform == 'darwin'",
     "platform_machine == 'aarch64' and sys_platform == 'linux'",
     "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
+    "sys_platform == 'darwin'",
 ]
 dependencies = [
     { name = "numpy", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'" },