diff --git a/.dockerignore b/.dockerignore
index 8e0ed179..3f0066a9 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,49 @@
+# Environment files
 .env
+.env.local
+.env.development
+.env.production
+
+# Auth files
 .drive.json
+*.json
+
+# Dependencies
+node_modules/
+*/node_modules/
+**/node_modules/
+
+# Python cache
+__pycache__/
+*/__pycache__/
+**/__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+
+# Build outputs
+build/
+dist/
+.next/
+out/
+
+# Development files
+.git/
+.gitignore
+README.md
+*.md
+.vscode/
+.idea/
+
+# Logs
+*.log
+logs/
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Temporary files
+tmp/
+temp/
diff --git a/.env.example b/.env.example
index 865e1fa3..6bb49c99 100644
--- a/.env.example
+++ b/.env.example
@@ -1,15 +1,24 @@
-# flow id from the the openrag flow json
-FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
+# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
+LANGFLOW_SECRET_KEY=
+
+# flow ids for chat and ingestion flows
+LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
+LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
 NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
+
 # Set a strong admin password for OpenSearch; a bcrypt hash is generated at
 # container startup from this value. Do not commit real secrets.
+# must match the hashed password in secureconfig, must change for secure deployment!!!
 OPENSEARCH_PASSWORD=
-# make here https://console.cloud.google.com/apis/credentials 
+
+# make here https://console.cloud.google.com/apis/credentials
 GOOGLE_OAUTH_CLIENT_ID=
 GOOGLE_OAUTH_CLIENT_SECRET=
+
 # Azure app registration credentials for SharePoint/OneDrive
 MICROSOFT_GRAPH_OAUTH_CLIENT_ID=
 MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=
+
 # OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion
 WEBHOOK_BASE_URL=
 
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..fe76467a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,210 @@
+# OpenRAG Development Makefile
+# Provides easy commands for development workflow
+
+.PHONY: help dev dev-cpu dev-local infra stop clean build logs shell-backend shell-frontend install test backend frontend install-be install-fe build-be build-fe logs-be logs-fe logs-lf logs-os shell-be shell-lf shell-os restart status health db-reset flow-upload quick setup
+
+# Default target
+help:
+	@echo "OpenRAG Development Commands"
+	@echo ""
+	@echo "Development:"
+	@echo "  dev          - Start full stack with GPU support (docker compose)"
+	@echo "  dev-cpu      - Start full stack with CPU only (docker compose)"
+	@echo "  dev-local    - Start infrastructure only, run backend/frontend locally"
+	@echo "  infra        - Start infrastructure services only (alias for dev-local)"
+	@echo "  stop         - Stop all containers"
+	@echo "  restart      - Restart all containers"
+	@echo ""
+	@echo "Local Development:"
+	@echo "  backend      - Run backend locally (requires infrastructure)"
+	@echo "  frontend     - Run frontend locally"
+	@echo "  install      - Install all dependencies"
+	@echo "  install-be   - Install backend dependencies (uv)"
+	@echo "  install-fe   - Install frontend dependencies (npm)"
+	@echo ""
+	@echo "Utilities:"
+	@echo "  build        - Build all Docker images"
+	@echo "  clean        - Stop containers and remove volumes"
+	@echo "  logs         - Show logs from all containers"
+	@echo "  logs-be      - Show backend container logs"
+	@echo "  logs-lf      - Show langflow container logs"
+	@echo "  shell-be     - Shell into backend container"
+	@echo "  shell-lf     - Shell into langflow container"
+	@echo ""
+	@echo "Testing:"
+	@echo "  test         - Run backend tests"
+	@echo "  lint         - Run linting checks"
+	@echo ""
+
+# Development environments
+dev:
+	@echo "🚀 Starting OpenRAG with GPU support..."
+	docker-compose up -d
+	@echo "✅ Services started!"
+	@echo "   Backend: http://localhost:8000"
+	@echo "   Frontend: http://localhost:3000"
+	@echo "   Langflow: http://localhost:7860"
+	@echo "   OpenSearch: http://localhost:9200"
+	@echo "   Dashboards: http://localhost:5601"
+
+dev-cpu:
+	@echo "🚀 Starting OpenRAG with CPU only..."
+	docker-compose -f docker-compose-cpu.yml up -d
+	@echo "✅ Services started!"
+	@echo "   Backend: http://localhost:8000"
+	@echo "   Frontend: http://localhost:3000"
+	@echo "   Langflow: http://localhost:7860"
+	@echo "   OpenSearch: http://localhost:9200"
+	@echo "   Dashboards: http://localhost:5601"
+
+dev-local:
+	@echo "🔧 Starting infrastructure only (for local development)..."
+	docker-compose up -d opensearch dashboards langflow
+	@echo "✅ Infrastructure started!"
+	@echo "   Langflow: http://localhost:7860"
+	@echo "   OpenSearch: http://localhost:9200"
+	@echo "   Dashboards: http://localhost:5601"
+	@echo ""
+	@echo "Now run 'make backend' and 'make frontend' in separate terminals"
+
+infra:
+	@echo "🔧 Starting infrastructure services only..."
+	docker-compose up -d opensearch dashboards langflow
+	@echo "✅ Infrastructure services started!"
+	@echo "   Langflow: http://localhost:7860"
+	@echo "   OpenSearch: http://localhost:9200"
+	@echo "   Dashboards: http://localhost:5601"
+
+# Container management
+stop:
+	@echo "🛑 Stopping all containers..."
+	docker-compose down
+	docker-compose -f docker-compose-cpu.yml down 2>/dev/null || true
+
+restart: stop dev
+
+clean: stop
+	@echo "🧹 Cleaning up containers and volumes..."
+	docker-compose down -v --remove-orphans
+	docker-compose -f docker-compose-cpu.yml down -v --remove-orphans 2>/dev/null || true
+	docker system prune -f
+
+# Local development
+backend:
+	@echo "🐍 Starting backend locally..."
+	@if [ ! -f .env ]; then echo "⚠️  .env file not found. Copy .env.example to .env first"; exit 1; fi
+	uv run python src/main.py
+
+frontend:
+	@echo "⚛️  Starting frontend locally..."
+	@if [ ! -d "frontend/node_modules" ]; then echo "📦 Installing frontend dependencies first..."; cd frontend && npm install; fi
+	cd frontend && npx next dev
+
+# Installation
+install: install-be install-fe
+	@echo "✅ All dependencies installed!"
+
+install-be:
+	@echo "📦 Installing backend dependencies..."
+	uv sync
+
+install-fe:
+	@echo "📦 Installing frontend dependencies..."
+	cd frontend && npm install
+
+# Building
+build:
+	@echo "🔨 Building Docker images..."
+	docker-compose build
+
+build-be:
+	@echo "🔨 Building backend image..."
+	docker build -t openrag-backend -f Dockerfile.backend .
+
+build-fe:
+	@echo "🔨 Building frontend image..."
+	docker build -t openrag-frontend -f Dockerfile.frontend .
+
+# Logging and debugging
+logs:
+	@echo "📋 Showing all container logs..."
+	docker-compose logs -f
+
+logs-be:
+	@echo "📋 Showing backend logs..."
+	docker-compose logs -f openrag-backend
+
+logs-fe:
+	@echo "📋 Showing frontend logs..."
+	docker-compose logs -f openrag-frontend
+
+logs-lf:
+	@echo "📋 Showing langflow logs..."
+	docker-compose logs -f langflow
+
+logs-os:
+	@echo "📋 Showing opensearch logs..."
+	docker-compose logs -f opensearch
+
+# Shell access
+shell-be:
+	@echo "🐚 Opening shell in backend container..."
+	docker-compose exec openrag-backend /bin/bash
+
+shell-lf:
+	@echo "🐚 Opening shell in langflow container..."
+	docker-compose exec langflow /bin/bash
+
+shell-os:
+	@echo "🐚 Opening shell in opensearch container..."
+	docker-compose exec opensearch /bin/bash
+
+# Testing and quality
+test:
+	@echo "🧪 Running backend tests..."
+	uv run pytest
+
+lint:
+	@echo "🔍 Running linting checks..."
+	cd frontend && npm run lint
+	@echo "Frontend linting complete"
+
+# Service status
+status:
+	@echo "📊 Container status:"
+	@docker-compose ps 2>/dev/null || echo "No containers running"
+
+health:
+	@echo "🏥 Health check:"
+	@echo "Backend: $$(curl -s http://localhost:8000/health 2>/dev/null || echo 'Not responding')"
+	@echo "Langflow: $$(curl -s http://localhost:7860/health 2>/dev/null || echo 'Not responding')"
+	@echo "OpenSearch: $$(curl -s -k -u admin:$(shell grep OPENSEARCH_PASSWORD .env | cut -d= -f2) https://localhost:9200 2>/dev/null | jq -r .tagline 2>/dev/null || echo 'Not responding')"
+
+# Database operations
+db-reset:
+	@echo "🗄️ Resetting OpenSearch indices..."
+	curl -X DELETE "http://localhost:9200/documents" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
+	curl -X DELETE "http://localhost:9200/knowledge_filters" -u admin:$$(grep OPENSEARCH_PASSWORD .env | cut -d= -f2) || true
+	@echo "Indices reset. Restart backend to recreate."
+
+# Flow management
+flow-upload:
+	@echo "📁 Uploading flow to Langflow..."
+	@if [ -z "$(FLOW_FILE)" ]; then echo "Usage: make flow-upload FLOW_FILE=path/to/flow.json"; exit 1; fi
+	curl -X POST "http://localhost:7860/api/v1/flows" \
+		-H "Content-Type: application/json" \
+		-d @$(FLOW_FILE)
+
+# Quick development shortcuts
+quick: dev-local
+	@echo "🚀 Quick start: infrastructure running!"
+	@echo "Run these in separate terminals:"
+	@echo "  make backend"
+	@echo "  make frontend"
+
+# Environment setup
+setup:
+	@echo "⚙️ Setting up development environment..."
+	@if [ ! -f .env ]; then cp .env.example .env && echo "📝 Created .env from template"; fi
+	@$(MAKE) install
+	@echo "✅ Setup complete! Run 'make dev' to start."
\ No newline at end of file
diff --git a/docker-compose-cpu.yml b/docker-compose-cpu.yml
index 0414a523..132cb233 100644
--- a/docker-compose-cpu.yml
+++ b/docker-compose-cpu.yml
@@ -15,10 +15,10 @@ services:
       bash -c "
         # Start OpenSearch in background
         /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-        
+
         # Wait a bit for OpenSearch to start, then apply security config
         sleep 10 && /usr/share/opensearch/setup-security.sh &
-        
+
         # Wait for background processes
         wait
       "
@@ -53,7 +53,8 @@ services:
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
       - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - FLOW_ID=${FLOW_ID}
+      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
+      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
       - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
       - OPENSEARCH_PORT=9200
       - OPENSEARCH_USERNAME=admin
@@ -98,7 +99,8 @@ services:
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - JWT="dummy"
       - OPENRAG-QUERY-FILTER="{}"
-      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER
+      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
+      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD
       - LANGFLOW_LOG_LEVEL=DEBUG
       - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
       - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
diff --git a/docker-compose.yml b/docker-compose.yml
index 23499cb9..62bb8d2c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -15,10 +15,10 @@ services:
       bash -c "
         # Start OpenSearch in background
         /usr/share/opensearch/opensearch-docker-entrypoint.sh opensearch &
-        
+
         # Wait a bit for OpenSearch to start, then apply security config
         sleep 10 && /usr/share/opensearch/setup-security.sh &
-        
+
         # Wait for background processes
         wait
       "
@@ -52,7 +52,8 @@ services:
       - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL}
       - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
       - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD}
-      - FLOW_ID=${FLOW_ID}
+      - LANGFLOW_CHAT_FLOW_ID=${LANGFLOW_CHAT_FLOW_ID}
+      - LANGFLOW_INGEST_FLOW_ID=${LANGFLOW_INGEST_FLOW_ID}
       - NUDGES_FLOW_ID=${NUDGES_FLOW_ID}
       - OPENSEARCH_PORT=9200
       - OPENSEARCH_USERNAME=admin
@@ -98,7 +99,8 @@ services:
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
       - JWT="dummy"
       - OPENRAG-QUERY-FILTER="{}"
-      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER
+      - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
+      - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD
       - LANGFLOW_LOG_LEVEL=DEBUG
       - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN}
       - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER}
diff --git a/flows/ingestion_flow.json b/flows/ingestion_flow.json
new file mode 100644
index 00000000..dd039a37
--- /dev/null
+++ b/flows/ingestion_flow.json
@@ -0,0 +1,2032 @@
+{
+  "data": {
+    "edges": [
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "SplitText",
+            "id": "SplitText-QIKhg",
+            "name": "dataframe",
+            "output_types": [
+              "DataFrame"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "ingest_data",
+            "id": "OpenSearchHybrid-Ve6bS",
+            "inputTypes": [
+              "Data",
+              "DataFrame"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__SplitText-QIKhg{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-QIKhgœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-OpenSearchHybrid-Ve6bS{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-Ve6bSœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "SplitText-QIKhg",
+        "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-QIKhgœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}",
+        "target": "OpenSearchHybrid-Ve6bS",
+        "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-Ve6bSœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "OpenAIEmbeddings",
+            "id": "OpenAIEmbeddings-joRJ6",
+            "name": "embeddings",
+            "output_types": [
+              "Embeddings"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "embedding",
+            "id": "OpenSearchHybrid-Ve6bS",
+            "inputTypes": [
+              "Embeddings"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__OpenAIEmbeddings-joRJ6{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-joRJ6œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-OpenSearchHybrid-Ve6bS{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-Ve6bSœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "OpenAIEmbeddings-joRJ6",
+        "sourceHandle": "{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-joRJ6œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}",
+        "target": "OpenSearchHybrid-Ve6bS",
+        "targetHandle": "{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-Ve6bSœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "File",
+            "id": "File-PSU37",
+            "name": "message",
+            "output_types": [
+              "Message"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "data_inputs",
+            "id": "SplitText-QIKhg",
+            "inputTypes": [
+              "Data",
+              "DataFrame",
+              "Message"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__File-PSU37{œdataTypeœ:œFileœ,œidœ:œFile-PSU37œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-QIKhg{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-QIKhgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "File-PSU37",
+        "sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-PSU37œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}",
+        "target": "SplitText-QIKhg",
+        "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-QIKhgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}"
+      }
+    ],
+    "nodes": [
+      {
+        "data": {
+          "description": "Split text into chunks based on specified criteria.",
+          "display_name": "Split Text",
+          "id": "SplitText-QIKhg",
+          "node": {
+            "base_classes": [
+              "DataFrame"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Split text into chunks based on specified criteria.",
+            "display_name": "Split Text",
+            "documentation": "https://docs.langflow.org/components-processing#split-text",
+            "edited": true,
+            "field_order": [
+              "data_inputs",
+              "chunk_overlap",
+              "chunk_size",
+              "separator",
+              "text_key",
+              "keep_separator"
+            ],
+            "frozen": false,
+            "icon": "scissors-line-dashed",
+            "legacy": false,
+            "lf_version": "1.5.0.post2",
+            "metadata": {
+              "code_hash": "65a90e1f4fe6",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langchain_text_splitters",
+                    "version": "0.3.9"
+                  },
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.split_text"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Chunks",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "split_text",
+                "name": "dataframe",
+                "options": null,
+                "required_inputs": null,
+                "selected": "DataFrame",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "chunk_overlap": {
+                "_input_type": "IntInput",
+                "advanced": false,
+                "display_name": "Chunk Overlap",
+                "dynamic": false,
+                "info": "Number of characters to overlap between chunks.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "chunk_overlap",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 200
+              },
+              "chunk_size": {
+                "_input_type": "IntInput",
+                "advanced": false,
+                "display_name": "Chunk Size",
+                "dynamic": false,
+                "info": "The maximum length of each chunk. Text is first split by separator, then chunks are merged up to this size. Individual splits larger than this won't be further divided.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "chunk_size",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1000
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n    display_name: str = \"Split Text\"\n    description: str = \"Split text into chunks based on specified criteria.\"\n    documentation: str = \"https://docs.langflow.org/components-processing#split-text\"\n    icon = \"scissors-line-dashed\"\n    name = \"SplitText\"\n\n    inputs = [\n        HandleInput(\n            name=\"data_inputs\",\n            display_name=\"Input\",\n            info=\"The data with texts to split in chunks.\",\n            input_types=[\"Data\", \"DataFrame\", \"Message\"],\n            required=True,\n        ),\n        IntInput(\n            name=\"chunk_overlap\",\n            display_name=\"Chunk Overlap\",\n            info=\"Number of characters to overlap between chunks.\",\n            value=200,\n        ),\n        IntInput(\n            name=\"chunk_size\",\n            display_name=\"Chunk Size\",\n            info=(\n                \"The maximum length of each chunk. Text is first split by separator, \"\n                \"then chunks are merged up to this size. \"\n                \"Individual splits larger than this won't be further divided.\"\n            ),\n            value=1000,\n        ),\n        MessageTextInput(\n            name=\"separator\",\n            display_name=\"Separator\",\n            info=(\n                \"The character to split on. Use \\\\n for newline. \"\n                \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n            ),\n            value=\"\\n\",\n        ),\n        MessageTextInput(\n            name=\"text_key\",\n            display_name=\"Text Key\",\n            info=\"The key to use for the text column.\",\n            value=\"text\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"keep_separator\",\n            display_name=\"Keep Separator\",\n            info=\"Whether to keep the separator in the output chunks and where to place it.\",\n            options=[\"False\", \"True\", \"Start\", \"End\"],\n            value=\"False\",\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n    ]\n\n    def _docs_to_data(self, docs) -> list[Data]:\n        data_list = [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n        return data_list\n\n    def _fix_separator(self, separator: str) -> str:\n        \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n        if separator == \"/n\":\n            return \"\\n\"\n        if separator == \"/t\":\n            return \"\\t\"\n        return separator\n\n    def split_text_base(self):\n        separator = self._fix_separator(self.separator)\n        separator = unescape_string(separator)\n\n        if isinstance(self.data_inputs, DataFrame):\n            if not len(self.data_inputs):\n                msg = \"DataFrame is empty\"\n                raise TypeError(msg)\n\n            self.data_inputs.text_key = self.text_key\n            try:\n                documents = self.data_inputs.to_lc_documents()\n            except Exception as e:\n                msg = f\"Error converting DataFrame to documents: {e}\"\n                raise TypeError(msg) from e\n        elif isinstance(self.data_inputs, Message):\n            self.data_inputs = [self.data_inputs.to_data()]\n            return self.split_text_base()\n        else:\n            if not self.data_inputs:\n                msg = \"No data inputs provided\"\n                raise TypeError(msg)\n\n            documents = []\n            if isinstance(self.data_inputs, Data):\n                self.data_inputs.text_key = self.text_key\n                documents = [self.data_inputs.to_lc_document()]\n            else:\n                try:\n                    documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n                    if not documents:\n                        msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n                        raise TypeError(msg)\n                except AttributeError as e:\n                    msg = f\"Invalid input type in collection: {e}\"\n                    raise TypeError(msg) from e\n        try:\n            # Convert string 'False'/'True' to boolean\n            keep_sep = self.keep_separator\n            if isinstance(keep_sep, str):\n                if keep_sep.lower() == \"false\":\n                    keep_sep = False\n                elif keep_sep.lower() == \"true\":\n                    keep_sep = True\n                # 'start' and 'end' are kept as strings\n            self.log(documents)\n            splitter = CharacterTextSplitter(\n                chunk_overlap=self.chunk_overlap,\n                chunk_size=self.chunk_size,\n                separator=separator,\n                keep_separator=keep_sep,\n            )\n            return splitter.split_documents(documents)\n        except Exception as e:\n            msg = f\"Error splitting text: {e}\"\n            raise TypeError(msg) from e\n\n    def split_text(self) -> DataFrame:\n        return DataFrame(self._docs_to_data(self.split_text_base()))\n"
+              },
+              "data_inputs": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Input",
+                "dynamic": false,
+                "info": "The data with texts to split in chunks.",
+                "input_types": [
+                  "Data",
+                  "DataFrame",
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "data_inputs",
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "keep_separator": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Keep Separator",
+                "dynamic": false,
+                "info": "Whether to keep the separator in the output chunks and where to place it.",
+                "name": "keep_separator",
+                "options": [
+                  "False",
+                  "True",
+                  "Start",
+                  "End"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "False"
+              },
+              "separator": {
+                "_input_type": "MessageTextInput",
+                "advanced": false,
+                "display_name": "Separator",
+                "dynamic": false,
+                "info": "The character to split on. Use \\n for newline. Examples: \\n\\n for paragraphs, \\n for lines, . for sentences",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "separator",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "\n"
+              },
+              "text_key": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Text Key",
+                "dynamic": false,
+                "info": "The key to use for the text column.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "text_key",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "text"
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "chunks",
+          "type": "SplitText"
+        },
+        "dragging": false,
+        "height": 475,
+        "id": "SplitText-QIKhg",
+        "measured": {
+          "height": 475,
+          "width": 320
+        },
+        "position": {
+          "x": 1729.1788373023007,
+          "y": 1330.8003441546418
+        },
+        "positionAbsolute": {
+          "x": 1683.4543896546102,
+          "y": 1350.7871623588553
+        },
+        "selected": false,
+        "type": "genericNode",
+        "width": 320
+      },
+      {
+        "data": {
+          "id": "OpenAIEmbeddings-joRJ6",
+          "node": {
+            "base_classes": [
+              "Embeddings"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Generate embeddings using OpenAI models.",
+            "display_name": "OpenAI Embeddings",
+            "documentation": "",
+            "edited": false,
+            "field_order": [
+              "default_headers",
+              "default_query",
+              "chunk_size",
+              "client",
+              "deployment",
+              "embedding_ctx_length",
+              "max_retries",
+              "model",
+              "model_kwargs",
+              "openai_api_key",
+              "openai_api_base",
+              "openai_api_type",
+              "openai_api_version",
+              "openai_organization",
+              "openai_proxy",
+              "request_timeout",
+              "show_progress_bar",
+              "skip_empty",
+              "tiktoken_model_name",
+              "tiktoken_enable",
+              "dimensions"
+            ],
+            "frozen": false,
+            "icon": "OpenAI",
+            "legacy": false,
+            "lf_version": "1.5.0.post2",
+            "metadata": {
+              "code_hash": "2691dee277c9",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langchain_openai",
+                    "version": "0.3.23"
+                  },
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "langflow.components.openai.openai.OpenAIEmbeddingsComponent"
+            },
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Embedding Model",
+                "group_outputs": false,
+                "method": "build_embeddings",
+                "name": "embeddings",
+                "selected": "Embeddings",
+                "tool_mode": true,
+                "types": [
+                  "Embeddings"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "chunk_size": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Chunk Size",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "chunk_size",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1000
+              },
+              "client": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Client",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "client",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from langchain_openai import OpenAIEmbeddings\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n    display_name = \"OpenAI Embeddings\"\n    description = \"Generate embeddings using OpenAI models.\"\n    icon = \"OpenAI\"\n    name = \"OpenAIEmbeddings\"\n\n    inputs = [\n        DictInput(\n            name=\"default_headers\",\n            display_name=\"Default Headers\",\n            advanced=True,\n            info=\"Default headers to use for the API request.\",\n        ),\n        DictInput(\n            name=\"default_query\",\n            display_name=\"Default Query\",\n            advanced=True,\n            info=\"Default query parameters to use for the API request.\",\n        ),\n        IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n        MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n        MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n        IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n        IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n        DropdownInput(\n            name=\"model\",\n            display_name=\"Model\",\n            advanced=False,\n            options=OPENAI_EMBEDDING_MODEL_NAMES,\n            value=\"text-embedding-3-small\",\n        ),\n        DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n        SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\", required=True),\n        MessageTextInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n        MessageTextInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n        MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n        MessageTextInput(\n            name=\"openai_organization\",\n            display_name=\"OpenAI Organization\",\n            advanced=True,\n        ),\n        MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n        FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n        BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n        BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n        MessageTextInput(\n            name=\"tiktoken_model_name\",\n            display_name=\"TikToken Model Name\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"tiktoken_enable\",\n            display_name=\"TikToken Enable\",\n            advanced=True,\n            value=True,\n            info=\"If False, you must have transformers installed.\",\n        ),\n        IntInput(\n            name=\"dimensions\",\n            display_name=\"Dimensions\",\n            info=\"The number of dimensions the resulting output embeddings should have. \"\n            \"Only supported by certain models.\",\n            advanced=True,\n        ),\n    ]\n\n    def build_embeddings(self) -> Embeddings:\n        return OpenAIEmbeddings(\n            client=self.client or None,\n            model=self.model,\n            dimensions=self.dimensions or None,\n            deployment=self.deployment or None,\n            api_version=self.openai_api_version or None,\n            base_url=self.openai_api_base or None,\n            openai_api_type=self.openai_api_type or None,\n            openai_proxy=self.openai_proxy or None,\n            embedding_ctx_length=self.embedding_ctx_length,\n            api_key=self.openai_api_key or None,\n            organization=self.openai_organization or None,\n            allowed_special=\"all\",\n            disallowed_special=\"all\",\n            chunk_size=self.chunk_size,\n            max_retries=self.max_retries,\n            timeout=self.request_timeout or None,\n            tiktoken_enabled=self.tiktoken_enable,\n            tiktoken_model_name=self.tiktoken_model_name or None,\n            show_progress_bar=self.show_progress_bar,\n            model_kwargs=self.model_kwargs,\n            skip_empty=self.skip_empty,\n            default_headers=self.default_headers or None,\n            default_query=self.default_query or None,\n        )\n"
+              },
+              "default_headers": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Default Headers",
+                "dynamic": false,
+                "info": "Default headers to use for the API request.",
+                "list": false,
+                "name": "default_headers",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "default_query": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Default Query",
+                "dynamic": false,
+                "info": "Default query parameters to use for the API request.",
+                "list": false,
+                "name": "default_query",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "deployment": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Deployment",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "deployment",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "dimensions": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Dimensions",
+                "dynamic": false,
+                "info": "The number of dimensions the resulting output embeddings should have. Only supported by certain models.",
+                "list": false,
+                "name": "dimensions",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": ""
+              },
+              "embedding_ctx_length": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Embedding Context Length",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "embedding_ctx_length",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1536
+              },
+              "max_retries": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Max Retries",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "max_retries",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 3
+              },
+              "model": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "display_name": "Model",
+                "dynamic": false,
+                "info": "",
+                "name": "model",
+                "options": [
+                  "text-embedding-3-small",
+                  "text-embedding-3-large",
+                  "text-embedding-ada-002"
+                ],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "text-embedding-3-small"
+              },
+              "model_kwargs": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Model Kwargs",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "model_kwargs",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "openai_api_base": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Base",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "openai_api_base",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_api_key": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "OpenAI API Key",
+                "dynamic": false,
+                "info": "",
+                "input_types": [],
+                "load_from_db": true,
+                "name": "openai_api_key",
+                "password": true,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "str",
+                "value": "OPENAI_API_KEY"
+              },
+              "openai_api_type": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Type",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "openai_api_type",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_api_version": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Version",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "openai_api_version",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_organization": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI Organization",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "openai_organization",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_proxy": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI Proxy",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "openai_proxy",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "request_timeout": {
+                "_input_type": "FloatInput",
+                "advanced": true,
+                "display_name": "Request Timeout",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "request_timeout",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "float",
+                "value": ""
+              },
+              "show_progress_bar": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Show Progress Bar",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "show_progress_bar",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "skip_empty": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Skip Empty",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "name": "skip_empty",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "tiktoken_enable": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "TikToken Enable",
+                "dynamic": false,
+                "info": "If False, you must have transformers installed.",
+                "list": false,
+                "name": "tiktoken_enable",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "tiktoken_model_name": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "TikToken Model Name",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "load_from_db": false,
+                "name": "tiktoken_model_name",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "embeddings",
+          "type": "OpenAIEmbeddings"
+        },
+        "dragging": false,
+        "height": 320,
+        "id": "OpenAIEmbeddings-joRJ6",
+        "measured": {
+          "height": 320,
+          "width": 320
+        },
+        "position": {
+          "x": 1704.8491676318172,
+          "y": 1879.144249471858
+        },
+        "positionAbsolute": {
+          "x": 1690.9220896443658,
+          "y": 1866.483269483266
+        },
+        "selected": false,
+        "type": "genericNode",
+        "width": 320
+      },
+      {
+        "data": {
+          "id": "note-Bm5Xw",
+          "node": {
+            "description": "### 💡 Add your OpenAI API key here 👇",
+            "display_name": "",
+            "documentation": "",
+            "template": {
+              "backgroundColor": "transparent"
+            }
+          },
+          "type": "note"
+        },
+        "dragging": false,
+        "height": 324,
+        "id": "note-Bm5Xw",
+        "measured": {
+          "height": 324,
+          "width": 324
+        },
+        "position": {
+          "x": 1692.2322233423606,
+          "y": 1821.9077961087607
+        },
+        "positionAbsolute": {
+          "x": 1692.2322233423606,
+          "y": 1821.9077961087607
+        },
+        "selected": false,
+        "type": "noteNode",
+        "width": 324
+      },
+      {
+        "data": {
+          "id": "File-PSU37",
+          "node": {
+            "base_classes": [
+              "Message"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Loads content from files with optional advanced document processing and export using Docling.",
+            "display_name": "File",
+            "documentation": "https://docs.langflow.org/components-data#file",
+            "edited": true,
+            "field_order": [
+              "path",
+              "file_path",
+              "separator",
+              "silent_errors",
+              "delete_server_file_after_processing",
+              "ignore_unsupported_extensions",
+              "ignore_unspecified_files",
+              "advanced_mode",
+              "pipeline",
+              "ocr_engine",
+              "md_image_placeholder",
+              "md_page_break_placeholder",
+              "doc_key",
+              "use_multithreading",
+              "concurrency_multithreading",
+              "markdown"
+            ],
+            "frozen": false,
+            "icon": "file-text",
+            "last_updated": "2025-09-09T02:18:48.064Z",
+            "legacy": false,
+            "lf_version": "1.5.0.post2",
+            "metadata": {
+              "code_hash": "086578fbbd54",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  },
+                  {
+                    "name": "anyio",
+                    "version": "4.10.0"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.file"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Raw Content",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "load_files_message",
+                "name": "message",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Message",
+                "tool_mode": true,
+                "types": [
+                  "Message"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "File Path",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "load_files_path",
+                "name": "path",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Message",
+                "tool_mode": true,
+                "types": [
+                  "Message"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "advanced_mode": {
+                "_input_type": "BoolInput",
+                "advanced": false,
+                "display_name": "Advanced Parser",
+                "dynamic": false,
+                "info": "Enable advanced document processing and export with Docling for PDFs, images, and office documents. Available only for single file processing.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "advanced_mode",
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "\"\"\"Enhanced file component with clearer structure and Docling isolation.\n\nNotes:\n-----\n- Functionality is preserved with minimal behavioral changes.\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom typing import TYPE_CHECKING, Any\n\nfrom langflow.base.data.base_file import BaseFileComponent\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom langflow.io import (\n    BoolInput,\n    DropdownInput,\n    FileInput,\n    IntInput,\n    MessageTextInput,\n    Output,\n    StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\nimport anyio\nfrom langflow.services.storage.utils import build_content_type_from_extension\nif TYPE_CHECKING:\n    from langflow.schema import DataFrame\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"File\"\n    description = \"Loads content from files with optional advanced document processing and export using Docling.\"\n    documentation: str = \"https://docs.langflow.org/components-data#file\"\n    icon = \"file-text\"\n    name = \"File\"\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"csv\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"docx\",\n        \"htm\",\n        \"html\",\n        \"jpeg\",\n        \"json\",\n        \"md\",\n        \"pdf\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"txt\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"xml\",\n        \"webp\",\n        *TEXT_FILE_TYPES,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    # ---- Inputs / Outputs (kept as close to original as possible) -------------------\n    _base_inputs = deepcopy(BaseFileComponent._base_inputs)\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            break\n\n    inputs = [\n        *_base_inputs,\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Available only for single file processing.\"\n            ),\n            show=False,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"\", \"easyocr\"],\n            value=\"\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n    ]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n            file_path = paths[0] if paths else \"\"\n            file_count = len(field_value) if field_value else 0\n\n            # Advanced mode only for single (non-tabular) file\n            allow_advanced = file_count == 1 and not file_path.endswith((\".csv\", \".xlsx\", \".parquet\"))\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Output\", name=\"advanced\", method=\"load_files_advanced\"),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Markdown\", name=\"markdown\", method=\"load_files_markdown\"),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"))\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n        \"\"\"\n        if not file_path:\n            return None\n\n        args: dict[str, Any] = {\n            \"file_path\": file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": str(self.ocr_engine) if getattr(self, \"ocr_engine\", \"\") else None,\n        }\n\n        # The child is a tiny, self-contained script to keep memory/state isolated.\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                # Strategy 1: latest layout\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception:\n                    pass\n                # Strategy 2: alternative layout\n                try:\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    try:\n                        from docling_core.types import ConversionStatus, InputFormat  # type: ignore\n                    except Exception:\n                        try:\n                            from docling.datamodel import ConversionStatus, InputFormat  # type: ignore\n                        except Exception:\n                            class ConversionStatus: SUCCESS = \"success\"\n                            class InputFormat:\n                                PDF=\"pdf\"; IMAGE=\"image\"\n                    try:\n                        from docling_core.types.doc import ImageRefMode  # type: ignore\n                    except Exception:\n                        class ImageRefMode:\n                            PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"alternative\"\n                except Exception:\n                    pass\n                # Strategy 3: basic converter only\n                try:\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    class ConversionStatus: SUCCESS = \"success\"\n                    class InputFormat:\n                        PDF=\"pdf\"; IMAGE=\"image\"\n                    class ImageRefMode:\n                        PLACEHOLDER=\"placeholder\"; EMBEDDED=\"embedded\"\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"basic\"\n                except Exception as e:\n                    raise ImportError(f\"Docling imports failed: {e}\") from e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                if strategy == \"latest\" and pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n                        pipe = PdfPipelineOptions()\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                pipe.do_ocr = False\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\", \"file_path\": file_path},\n            )\n\n        if not result.get(\"ok\"):\n            return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n        meta = result.get(\"meta\", {})\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - Single file + advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        # Advanced path: only for a single Docling-compatible file\n        if len(file_list) == 1:\n            file_path = str(file_list[0].path)\n            if self.advanced_mode and self._is_docling_compatible(file_path):\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list):\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    return self.rollup_data(file_list, rows)\n\n                # If not structured, keep as-is (e.g., markdown export or error dict)\n                return self.rollup_data(file_list, [advanced_data])\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_advanced(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to an advanced format.\"\"\"\n        self.markdown = False\n        return self.load_files()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files()\n        return Message(text=str(result.text[0]))\n    \n    async def load_files_message(self) -> Message:\n        \"\"\"Load files and return as Message.\n        \n        Returns:\n          Message: Message containing all file data\n        \"\"\"\n        data_list = self.load_files_core()\n        if not data_list:\n          return Message()  # No data -> empty message\n        \n        sep: str = getattr(self, \"separator\", \"\\n\\n\") or \"\\n\\n\"\n        \n        parts: list[str] = []\n        metadata = {}  # Initialize as empty dict instead of None\n        \n        for d in data_list:\n          # Prefer explicit text if available, fall back to full dict, lastly str()\n          text = (getattr(d, \"get_text\", lambda: None)() or d.data.get(\"text\")) if isinstance(d.data, dict) else None\n          parts.append(text if text is not None else str(d))\n        \n          # Set metadata from first file (or you could combine metadata from all files)\n          if not metadata and hasattr(d, 'file_path'):\n              file_path = d.file_path\n              # Get filename\n              file_path_obj = anyio.Path(file_path)\n              file_size_stat = await file_path_obj.stat()\n              filesize = file_size_stat.st_size\n              filename = file_path_obj.name\n              metadata[\"filename\"] = filename\n              metadata[\"file_size\"] = filesize\n              extension = filename.split(\".\")[-1]\n              if extension:\n                metadata[\"mimetype\"] = build_content_type_from_extension(filename.split(\".\")[-1])\n        \n              # Add other common metadata fields if available\n              if hasattr(d, 'data') and isinstance(d.data, dict):\n                  # Copy relevant metadata fields\n                  for field in ['mimetype', 'file_size', 'created_time', 'modified_time']:\n                      if field in d.data:\n                          metadata[field] = d.data[field]\n        self.log(metadata)\n        return Message(text=sep.join(parts), **metadata)\n"
+              },
+              "concurrency_multithreading": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Processing Concurrency",
+                "dynamic": false,
+                "info": "When multiple files are being processed, the number of files to process concurrently.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "concurrency_multithreading",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1
+              },
+              "delete_server_file_after_processing": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Delete Server File After Processing",
+                "dynamic": false,
+                "info": "If true, the Server File Path will be deleted after processing.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "delete_server_file_after_processing",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "doc_key": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Doc Key",
+                "dynamic": false,
+                "info": "The key to use for the DoclingDocument column.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "doc_key",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "doc"
+              },
+              "file_path": {
+                "_input_type": "HandleInput",
+                "advanced": true,
+                "display_name": "Server File Path",
+                "dynamic": false,
+                "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.",
+                "input_types": [
+                  "Data",
+                  "Message"
+                ],
+                "list": true,
+                "list_add_label": "Add More",
+                "name": "file_path",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "ignore_unspecified_files": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Ignore Unspecified Files",
+                "dynamic": false,
+                "info": "If true, Data with no 'file_path' property will be ignored.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ignore_unspecified_files",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "ignore_unsupported_extensions": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Ignore Unsupported Extensions",
+                "dynamic": false,
+                "info": "If true, files with unsupported extensions will not be processed.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ignore_unsupported_extensions",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "markdown": {
+                "_input_type": "BoolInput",
+                "advanced": false,
+                "display_name": "Markdown Export",
+                "dynamic": false,
+                "info": "Export processed documents to Markdown format. Only available when advanced mode is enabled.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "markdown",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "md_image_placeholder": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Image placeholder",
+                "dynamic": false,
+                "info": "Specify the image placeholder for markdown exports.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "md_image_placeholder",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "<!-- image -->"
+              },
+              "md_page_break_placeholder": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Page break placeholder",
+                "dynamic": false,
+                "info": "Add this placeholder between pages in the markdown output.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "md_page_break_placeholder",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "ocr_engine": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "OCR Engine",
+                "dynamic": false,
+                "info": "OCR engine to use. Only available when pipeline is set to 'standard'.",
+                "name": "ocr_engine",
+                "options": [
+                  "",
+                  "easyocr"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "path": {
+                "_input_type": "FileInput",
+                "advanced": false,
+                "display_name": "Files",
+                "dynamic": false,
+                "fileTypes": [
+                  "adoc",
+                  "asciidoc",
+                  "asc",
+                  "bmp",
+                  "csv",
+                  "dotx",
+                  "dotm",
+                  "docm",
+                  "docx",
+                  "htm",
+                  "html",
+                  "jpeg",
+                  "json",
+                  "md",
+                  "pdf",
+                  "png",
+                  "potx",
+                  "ppsx",
+                  "pptm",
+                  "potm",
+                  "ppsm",
+                  "pptx",
+                  "tiff",
+                  "txt",
+                  "xls",
+                  "xlsx",
+                  "xhtml",
+                  "xml",
+                  "webp",
+                  "txt",
+                  "md",
+                  "mdx",
+                  "csv",
+                  "json",
+                  "yaml",
+                  "yml",
+                  "xml",
+                  "html",
+                  "htm",
+                  "pdf",
+                  "docx",
+                  "py",
+                  "sh",
+                  "sql",
+                  "js",
+                  "ts",
+                  "tsx",
+                  "zip",
+                  "tar",
+                  "tgz",
+                  "bz2",
+                  "gz"
+                ],
+                "file_path": [],
+                "info": "Supported file extensions: adoc, asciidoc, asc, bmp, csv, dotx, dotm, docm, docx, htm, html, jpeg, json, md, pdf, png, potx, ppsx, pptm, potm, ppsm, pptx, tiff, txt, xls, xlsx, xhtml, xml, webp, txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
+                "list": true,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "path",
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "temp_file": false,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "file",
+                "value": ""
+              },
+              "pipeline": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Pipeline",
+                "dynamic": false,
+                "info": "Docling pipeline to use",
+                "name": "pipeline",
+                "options": [
+                  "standard",
+                  "vlm"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "standard"
+              },
+              "separator": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Separator",
+                "dynamic": false,
+                "info": "Specify the separator to use between multiple outputs in Message format.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "separator",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "\n\n"
+              },
+              "silent_errors": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Silent Errors",
+                "dynamic": false,
+                "info": "If true, errors will not raise an exception.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "silent_errors",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "use_multithreading": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "[Deprecated] Use Multithreading",
+                "dynamic": false,
+                "info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "use_multithreading",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "message",
+          "showNode": true,
+          "type": "File"
+        },
+        "dragging": false,
+        "id": "File-PSU37",
+        "measured": {
+          "height": 275,
+          "width": 320
+        },
+        "position": {
+          "x": 1330.7650978046952,
+          "y": 1431.5905495627503
+        },
+        "selected": false,
+        "type": "genericNode"
+      },
+      {
+        "data": {
+          "id": "OpenSearchHybrid-Ve6bS",
+          "node": {
+            "base_classes": [
+              "Data",
+              "DataFrame",
+              "VectorStore"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.",
+            "display_name": "OpenSearch (Hybrid)",
+            "documentation": "",
+            "edited": true,
+            "field_order": [
+              "docs_metadata",
+              "opensearch_url",
+              "index_name",
+              "engine",
+              "space_type",
+              "ef_construction",
+              "m",
+              "ingest_data",
+              "search_query",
+              "should_cache_vector_store",
+              "embedding",
+              "vector_field",
+              "number_of_results",
+              "filter_expression",
+              "auth_mode",
+              "username",
+              "password",
+              "jwt_token",
+              "jwt_header",
+              "bearer_prefix",
+              "use_ssl",
+              "verify_certs"
+            ],
+            "frozen": false,
+            "icon": "OpenSearch",
+            "legacy": false,
+            "metadata": {
+              "code_hash": "deee3f04cb47",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  },
+                  {
+                    "name": "opensearchpy",
+                    "version": "2.8.0"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.opensearch_hybrid"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Search Results",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "search_documents",
+                "name": "search_results",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Data",
+                "tool_mode": true,
+                "types": [
+                  "Data"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "DataFrame",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "as_dataframe",
+                "name": "dataframe",
+                "options": null,
+                "required_inputs": null,
+                "selected": "DataFrame",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Vector Store Connection",
+                "group_outputs": false,
+                "hidden": true,
+                "method": "as_vector_store",
+                "name": "vectorstoreconnection",
+                "options": null,
+                "required_inputs": null,
+                "selected": "VectorStore",
+                "tool_mode": true,
+                "types": [
+                  "VectorStore"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "auth_mode": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Auth Mode",
+                "dynamic": false,
+                "info": "Choose Basic (username/password) or JWT (Bearer token).",
+                "load_from_db": false,
+                "name": "auth_mode",
+                "options": [
+                  "basic",
+                  "jwt"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "jwt"
+              },
+              "bearer_prefix": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Prefix 'Bearer '",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "bearer_prefix",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any, Dict, List, Optional\n\nfrom langflow.base.vectorstores.model import (\n    LCVectorStoreComponent,\n    check_cached_vector_store,\n)\nfrom langflow.base.vectorstores.vector_store_connection_decorator import (\n    vector_store_connection,\n)\nfrom langflow.io import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    MultilineInput,\n    SecretStrInput,\n    StrInput,\n    TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom opensearchpy import OpenSearch, helpers\n\n\n@vector_store_connection\nclass OpenSearchHybridComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch hybrid search: KNN (k=10, boost=0.7) + multi_match (boost=0.3) with optional filters & min_score.\"\"\"\n\n    display_name: str = \"OpenSearch (Hybrid)\"\n    name: str = \"OpenSearchHybrid\"\n    icon: str = \"OpenSearch\"\n    description: str = \"Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.\"\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[\n            i.name for i in LCVectorStoreComponent.inputs\n        ],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Ingestion Metadata\",\n            info=\"Key value pairs to be inserted into each ingested document.\",\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            advanced=True,\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=\"URL for your OpenSearch cluster.\",\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=\"The index to search.\",\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=\"Vector search engine to use.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Space Type\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=\"Distance metric for vector similarity.\",\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=\"Size of the dynamic list used during k-NN graph creation.\",\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=\"Number of bidirectional links created for each new element.\",\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(\n            name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]\n        ),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Vector field used for KNN.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Size (limit)\",\n            value=10,\n            advanced=True,\n            info=\"Default number of hits when no limit provided in filter_expression.\",\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Filter Expression (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON to control filters/limit/score threshold.\\n\"\n                \"Accepted shapes:\\n\"\n                '1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\\n'\n                '2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\\n'\n                \"Placeholders with __IMPOSSIBLE_VALUE__ are ignored.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Auth Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=True,\n            show=True,\n            info=\"Paste a valid JWT (sent as a header).\",\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(name=\"use_ssl\", display_name=\"Use SSL\", value=True, advanced=True),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify Certificates\",\n            value=False,\n            advanced=True,\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> Dict[str, Any]:\n        \"\"\"For Approximate k-NN Search, this is the default mapping to create index.\"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate AOSS with the engine.\"\"\"\n        if is_aoss and engine != \"nmslib\" and engine != \"faiss\":\n            raise ValueError(\n                \"Amazon OpenSearch Service Serverless only \"\n                \"supports `nmslib` or `faiss` engines\"\n            )\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Check if the service is http_auth is set as `aoss`.\"\"\"\n        if (\n            http_auth is not None\n            and hasattr(http_auth, \"service\")\n            and http_auth.service == \"aoss\"\n        ):\n            return True\n        return False\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: List[List[float]],\n        texts: List[str],\n        metadatas: Optional[List[dict]] = None,\n        ids: Optional[List[str]] = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: Optional[Dict] = None,\n        max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,\n        is_aoss: bool = False,\n    ) -> List[str]:\n        \"\"\"Bulk Ingest Embeddings into given index.\"\"\"\n        if not mapping:\n            mapping = dict()\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        self.log(metadatas[i])\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> Dict[str, Any]:\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            for item in self.docs_metadata:\n                if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                    additional_metadata[item[\"key\"]] = item[\"value\"]\n\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            raise ValueError(\"Embedding handle is required to embed documents.\")\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss, engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(\n            f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\"\n        )\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> List[dict]:\n        \"\"\"\n        Accepts either:\n          A) {\"filter\":[ ...term/terms objects... ], \"limit\":..., \"score_threshold\":...}\n          B) Context-style: {\"data_sources\":[...], \"document_types\":[...], \"owners\":[...]}\n        Returns a list of OS filter clauses (term/terms), skipping placeholders and empty terms.\n        \"\"\"\n\n        if not filter_obj:\n            return []\n\n        # If it’s a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except Exception:\n                # Not valid JSON → treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            clauses: List[dict] = []\n            for f in raw or []:\n                if (\n                    \"term\" in f\n                    and isinstance(f[\"term\"], dict)\n                    and not self._is_placeholder_term(f[\"term\"])\n                ):\n                    clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        clauses.append(f)\n            return clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        clauses: List[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    clauses.append({\"term\": {field: values[0]}})\n            else:\n                clauses.append({\"terms\": {field: values}})\n        return clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                raise ValueError(f\"Invalid filter_expression JSON: {e}\") from e\n\n        if not self.embedding:\n            raise ValueError(\n                \"Embedding is required to run hybrid search (KNN + keyword).\"\n            )\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(\n        self, build_config: dict, field_value: str, field_name: str | None = None\n    ) -> dict:\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n            return build_config\n\n        except Exception as e:\n            self.log(f\"update_build_config error: {e}\")\n            return build_config\n"
+              },
+              "docs_metadata": {
+                "_input_type": "TableInput",
+                "advanced": true,
+                "display_name": "Ingestion Metadata",
+                "dynamic": false,
+                "info": "Key value pairs to be inserted into each ingested document.",
+                "is_list": true,
+                "list_add_label": "Add More",
+                "name": "docs_metadata",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "table_icon": "Table",
+                "table_schema": {
+                  "columns": [
+                    {
+                      "default": "None",
+                      "description": "Key name",
+                      "disable_edit": false,
+                      "display_name": "Key",
+                      "edit_mode": "popover",
+                      "filterable": true,
+                      "formatter": "text",
+                      "hidden": false,
+                      "name": "key",
+                      "sortable": true,
+                      "type": "str"
+                    },
+                    {
+                      "default": "None",
+                      "description": "Value of the metadata",
+                      "disable_edit": false,
+                      "display_name": "Value",
+                      "edit_mode": "popover",
+                      "filterable": true,
+                      "formatter": "text",
+                      "hidden": false,
+                      "name": "value",
+                      "sortable": true,
+                      "type": "str"
+                    }
+                  ]
+                },
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "trigger_icon": "Table",
+                "trigger_text": "Open table",
+                "type": "table",
+                "value": []
+              },
+              "ef_construction": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "EF Construction",
+                "dynamic": false,
+                "info": "Size of the dynamic list used during k-NN graph creation.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ef_construction",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 512
+              },
+              "embedding": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Embedding",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Embeddings"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "embedding",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "engine": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Engine",
+                "dynamic": false,
+                "info": "Vector search engine to use.",
+                "load_from_db": false,
+                "name": "engine",
+                "options": [
+                  "jvector",
+                  "nmslib",
+                  "faiss",
+                  "lucene"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "nmslib"
+              },
+              "filter_expression": {
+                "_input_type": "MultilineInput",
+                "advanced": false,
+                "copy_field": false,
+                "display_name": "Filter Expression (JSON)",
+                "dynamic": false,
+                "info": "Optional JSON to control filters/limit/score threshold.\nAccepted shapes:\n1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\n2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\nPlaceholders with __IMPOSSIBLE_VALUE__ are ignored.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "multiline": true,
+                "name": "filter_expression",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "index_name": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "Index Name",
+                "dynamic": false,
+                "info": "The index to search.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "index_name",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "documents"
+              },
+              "ingest_data": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Ingest Data",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Data",
+                  "DataFrame"
+                ],
+                "list": true,
+                "list_add_label": "Add More",
+                "name": "ingest_data",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "jwt_header": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "JWT Header Name",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "jwt_header",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "Authorization"
+              },
+              "jwt_token": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "JWT Token",
+                "dynamic": false,
+                "info": "Paste a valid JWT (sent as a header).",
+                "input_types": [],
+                "load_from_db": false,
+                "name": "jwt_token",
+                "password": true,
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "type": "str",
+                "value": ""
+              },
+              "m": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "M Parameter",
+                "dynamic": false,
+                "info": "Number of bidirectional links created for each new element.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "m",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 16
+              },
+              "number_of_results": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Default Size (limit)",
+                "dynamic": false,
+                "info": "Default number of hits when no limit provided in filter_expression.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "number_of_results",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 15
+              },
+              "opensearch_url": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "OpenSearch URL",
+                "dynamic": false,
+                "info": "URL for your OpenSearch cluster.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "opensearch_url",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "https://opensearch:9200"
+              },
+              "password": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "Password",
+                "dynamic": false,
+                "info": "",
+                "input_types": [],
+                "load_from_db": false,
+                "name": "password",
+                "password": true,
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "type": "str",
+                "value": ""
+              },
+              "search_query": {
+                "_input_type": "QueryInput",
+                "advanced": false,
+                "display_name": "Search Query",
+                "dynamic": false,
+                "info": "Enter a query to run a similarity search.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "search_query",
+                "placeholder": "Enter a query...",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": true,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "query",
+                "value": ""
+              },
+              "should_cache_vector_store": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Cache Vector Store",
+                "dynamic": false,
+                "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "should_cache_vector_store",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "space_type": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Space Type",
+                "dynamic": false,
+                "info": "Distance metric for vector similarity.",
+                "name": "space_type",
+                "options": [
+                  "l2",
+                  "l1",
+                  "cosinesimil",
+                  "linf",
+                  "innerproduct"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "l2"
+              },
+              "use_ssl": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Use SSL",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "use_ssl",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "username": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "Username",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "username",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "admin"
+              },
+              "vector_field": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Vector Field",
+                "dynamic": false,
+                "info": "Vector field used for KNN.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "vector_field",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "chunk_embedding"
+              },
+              "verify_certs": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Verify Certificates",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "verify_certs",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "search_results",
+          "showNode": true,
+          "type": "OpenSearchHybrid"
+        },
+        "dragging": false,
+        "id": "OpenSearchHybrid-Ve6bS",
+        "measured": {
+          "height": 761,
+          "width": 320
+        },
+        "position": {
+          "x": 2218.9287723423276,
+          "y": 1332.2598463956504
+        },
+        "selected": true,
+        "type": "genericNode"
+      }
+    ],
+    "viewport": {
+      "x": -919.0070567185035,
+      "y": -955.5333976627492,
+      "zoom": 0.8337061732891438
+    }
+  },
+  "description": "Load your data for chat context with Retrieval Augmented Generation.",
+  "endpoint_name": null,
+  "id": "5488df7c-b93f-4f87-a446-b67028bc0813",
+  "is_component": false,
+  "last_tested_version": "1.5.0.post2",
+  "name": "OpenSearch Ingestion Flow",
+  "tags": [
+    "openai",
+    "astradb",
+    "rag",
+    "q-a"
+  ]
+}
\ No newline at end of file
diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx
index 75591087..6243e5e6 100644
--- a/frontend/components/knowledge-dropdown.tsx
+++ b/frontend/components/knowledge-dropdown.tsx
@@ -133,24 +133,50 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD
         const formData = new FormData()
         formData.append('file', files[0])
         
-        const response = await fetch('/api/upload', {
+        // 1) Upload to Langflow
+        const upRes = await fetch('/api/langflow/files/upload', {
           method: 'POST',
           body: formData,
         })
-
-        const result = await response.json()
-        
-        if (response.ok) {
-          window.dispatchEvent(new CustomEvent('fileUploaded', { 
-            detail: { file: files[0], result } 
-          }))
-          // Trigger search refresh after successful upload
-          window.dispatchEvent(new CustomEvent('knowledgeUpdated'))
-        } else {
-          window.dispatchEvent(new CustomEvent('fileUploadError', { 
-            detail: { filename: files[0].name, error: result.error || 'Upload failed' } 
-          }))
+        const upJson = await upRes.json()
+        if (!upRes.ok) {
+          throw new Error(upJson?.error || 'Upload to Langflow failed')
         }
+
+        const fileId = upJson?.id
+        const filePath = upJson?.path
+        if (!fileId || !filePath) {
+          throw new Error('Langflow did not return file id/path')
+        }
+
+        // 2) Run ingestion flow
+        const runRes = await fetch('/api/langflow/ingest', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ file_paths: [filePath] }),
+        })
+        const runJson = await runRes.json()
+        if (!runRes.ok) {
+          throw new Error(runJson?.error || 'Langflow ingestion failed')
+        }
+
+        // 3) Delete file from Langflow
+        const delRes = await fetch('/api/langflow/files', {
+          method: 'DELETE',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ file_ids: [fileId] }),
+        })
+        const delJson = await delRes.json().catch(() => ({}))
+        if (!delRes.ok) {
+          throw new Error(delJson?.error || 'Langflow file delete failed')
+        }
+
+        // Notify UI
+        window.dispatchEvent(new CustomEvent('fileUploaded', { 
+          detail: { file: files[0], result: { file_id: fileId, file_path: filePath, run: runJson } } 
+        }))
+        // Trigger search refresh after successful ingestion
+        window.dispatchEvent(new CustomEvent('knowledgeUpdated'))
       } catch (error) {
         window.dispatchEvent(new CustomEvent('fileUploadError', { 
           detail: { filename: files[0].name, error: error instanceof Error ? error.message : 'Upload failed' } 
diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx
index 121a460b..c3262156 100644
--- a/frontend/src/app/admin/page.tsx
+++ b/frontend/src/app/admin/page.tsx
@@ -57,7 +57,7 @@ function AdminPage() {
       })
 
       const result = await response.json()
-      
+
       if (response.ok) {
         setUploadStatus(`File uploaded successfully! ID: ${result.id}`)
         setSelectedFile(null)
@@ -132,23 +132,23 @@ function AdminPage() {
       })
 
       const result = await response.json()
-      
+
       if (response.status === 201) {
         // New flow: Got task ID, use centralized tracking
         const taskId = result.task_id || result.id
         const totalFiles = result.total_files || 0
-        
+
         if (!taskId) {
           throw new Error("No task ID received from server")
         }
-        
+
         // Add task to centralized tracking
         addTask(taskId)
-        
+
         setUploadStatus(`🔄 Processing started for ${totalFiles} files. Check the task notification panel for real-time progress. (Task ID: ${taskId})`)
         setFolderPath("")
         setPathUploadLoading(false)
-        
+
       } else if (response.ok) {
         // Original flow: Direct response with results
         const successful = result.results?.filter((r: {status: string}) => r.status === "indexed").length || 0
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index c2c4fce2..52d1ca01 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -52,11 +52,11 @@ interface Connector {
 }
 
 interface SyncResult {
-  processed?: number;
-  added?: number;
-  errors?: number;
-  skipped?: number;
-  total?: number;
+	processed?: number;
+	added?: number;
+	errors?: number;
+	skipped?: number;
+	total?: number;
 }
 
 interface Connection {
diff --git a/pyproject.toml b/pyproject.toml
index 2b4e821d..f8d119ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openrag"
-version = "0.1.1"
+version = "0.1.2"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
diff --git a/src/api/connectors.py b/src/api/connectors.py
index b7b603f0..6dc10cee 100644
--- a/src/api/connectors.py
+++ b/src/api/connectors.py
@@ -66,6 +66,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
                     max_files,
                     jwt_token=jwt_token,
                 )
+            task_ids.append(task_id)
         return JSONResponse(
             {
                 "task_ids": task_ids,
diff --git a/src/api/langflow_files.py b/src/api/langflow_files.py
new file mode 100644
index 00000000..36deafbd
--- /dev/null
+++ b/src/api/langflow_files.py
@@ -0,0 +1,159 @@
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from services.langflow_file_service import LangflowFileService
+from utils.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+async def upload_user_file(
+    request: Request, langflow_file_service: LangflowFileService, session_manager
+):
+    try:
+        logger.debug("upload_user_file endpoint called")
+        form = await request.form()
+        upload_file = form.get("file")
+        if upload_file is None:
+            logger.error("No file provided in upload request")
+            return JSONResponse({"error": "Missing file"}, status_code=400)
+
+        logger.debug(
+            "Processing file", filename=upload_file.filename, size=upload_file.size
+        )
+
+        # starlette UploadFile provides file-like; httpx needs (filename, file, content_type)
+        content = await upload_file.read()
+        file_tuple = (
+            upload_file.filename,
+            content,
+            upload_file.content_type or "application/octet-stream",
+        )
+
+        jwt_token = getattr(request.state, "jwt_token", None)
+        logger.debug("JWT token status", jwt_present=jwt_token is not None)
+
+        logger.debug("Calling langflow_file_service.upload_user_file")
+        result = await langflow_file_service.upload_user_file(file_tuple, jwt_token)
+        logger.debug("Upload successful", result=result)
+        return JSONResponse(result, status_code=201)
+    except Exception as e:
+        logger.error(
+            "upload_user_file endpoint failed",
+            error_type=type(e).__name__,
+            error=str(e),
+        )
+        import traceback
+
+        logger.error("Full traceback", traceback=traceback.format_exc())
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+async def run_ingestion(
+    request: Request, langflow_file_service: LangflowFileService, session_manager
+):
+    try:
+        payload = await request.json()
+        file_ids = payload.get("file_ids")
+        file_paths = payload.get("file_paths") or []
+        session_id = payload.get("session_id")
+        tweaks = payload.get("tweaks") or {}
+        settings = payload.get("settings", {})
+
+        # We assume file_paths is provided. If only file_ids are provided, client would need to resolve to paths via Files API (not implemented here).
+        if not file_paths and not file_ids:
+            return JSONResponse(
+                {"error": "Provide file_paths or file_ids"}, status_code=400
+            )
+
+        # Convert UI settings to component tweaks using exact component IDs
+        if settings:
+            logger.debug("Applying ingestion settings", settings=settings)
+
+            # Split Text component tweaks (SplitText-QIKhg)
+            if (
+                settings.get("chunkSize")
+                or settings.get("chunkOverlap")
+                or settings.get("separator")
+            ):
+                if "SplitText-QIKhg" not in tweaks:
+                    tweaks["SplitText-QIKhg"] = {}
+                if settings.get("chunkSize"):
+                    tweaks["SplitText-QIKhg"]["chunk_size"] = settings["chunkSize"]
+                if settings.get("chunkOverlap"):
+                    tweaks["SplitText-QIKhg"]["chunk_overlap"] = settings[
+                        "chunkOverlap"
+                    ]
+                if settings.get("separator"):
+                    tweaks["SplitText-QIKhg"]["separator"] = settings["separator"]
+
+            # OpenAI Embeddings component tweaks (OpenAIEmbeddings-joRJ6)
+            if settings.get("embeddingModel"):
+                if "OpenAIEmbeddings-joRJ6" not in tweaks:
+                    tweaks["OpenAIEmbeddings-joRJ6"] = {}
+                tweaks["OpenAIEmbeddings-joRJ6"]["model"] = settings["embeddingModel"]
+
+            # Note: OpenSearch component tweaks not needed for ingestion
+            # (search parameters are for retrieval, not document processing)
+
+            logger.debug("Final tweaks with settings applied", tweaks=tweaks)
+        # Include user JWT if available
+        jwt_token = getattr(request.state, "jwt_token", None)
+
+        # Extract user info from User object
+        user = getattr(request.state, "user", None)
+        user_id = user.user_id if user else None
+        user_name = user.name if user else None
+        user_email = user.email if user else None
+
+        if jwt_token:
+            # Set auth context for downstream services
+            from auth_context import set_auth_context
+
+            set_auth_context(user_id, jwt_token)
+
+        result = await langflow_file_service.run_ingestion_flow(
+            file_paths=file_paths or [],
+            jwt_token=jwt_token,
+            session_id=session_id,
+            tweaks=tweaks,
+            owner=user_id,
+            owner_name=user_name,
+            owner_email=user_email,
+            connector_type="local",
+        )
+        return JSONResponse(result)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+async def delete_user_files(
+    request: Request, langflow_file_service: LangflowFileService, session_manager
+):
+    try:
+        payload = await request.json()
+        file_ids = payload.get("file_ids")
+        if not file_ids or not isinstance(file_ids, list):
+            return JSONResponse(
+                {"error": "file_ids must be a non-empty list"}, status_code=400
+            )
+
+        errors = []
+        for fid in file_ids:
+            try:
+                await langflow_file_service.delete_user_file(fid)
+            except Exception as e:
+                errors.append({"file_id": fid, "error": str(e)})
+
+        status = 207 if errors else 200
+        return JSONResponse(
+            {
+                "deleted": [
+                    fid for fid in file_ids if fid not in [e["file_id"] for e in errors]
+                ],
+                "errors": errors,
+            },
+            status_code=status,
+        )
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
diff --git a/src/api/settings.py b/src/api/settings.py
index 9efbeedc..b6148464 100644
--- a/src/api/settings.py
+++ b/src/api/settings.py
@@ -1,6 +1,10 @@
-import os
 from starlette.responses import JSONResponse
-from config.settings import LANGFLOW_URL, FLOW_ID, LANGFLOW_PUBLIC_URL
+from config.settings import (
+    LANGFLOW_URL,
+    LANGFLOW_CHAT_FLOW_ID,
+    LANGFLOW_INGEST_FLOW_ID,
+    LANGFLOW_PUBLIC_URL,
+)
 
 
 async def get_settings(request, session_manager):
@@ -9,16 +13,92 @@ async def get_settings(request, session_manager):
         # Return public settings that are safe to expose to frontend
         settings = {
             "langflow_url": LANGFLOW_URL,
-            "flow_id": FLOW_ID,
+            "flow_id": LANGFLOW_CHAT_FLOW_ID,
+            "ingest_flow_id": LANGFLOW_INGEST_FLOW_ID,
             "langflow_public_url": LANGFLOW_PUBLIC_URL,
         }
 
-        # Only expose edit URL when a public URL is configured
-        if LANGFLOW_PUBLIC_URL and FLOW_ID:
+        # Only expose edit URLs when a public URL is configured
+        if LANGFLOW_PUBLIC_URL and LANGFLOW_CHAT_FLOW_ID:
             settings["langflow_edit_url"] = (
-                f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{FLOW_ID}"
+                f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_CHAT_FLOW_ID}"
             )
 
+        if LANGFLOW_PUBLIC_URL and LANGFLOW_INGEST_FLOW_ID:
+            settings["langflow_ingest_edit_url"] = (
+                f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_INGEST_FLOW_ID}"
+            )
+
+        # Fetch ingestion flow configuration to get actual component defaults
+        if LANGFLOW_INGEST_FLOW_ID:
+            try:
+                from config.settings import generate_langflow_api_key
+                import httpx
+
+                api_key = await generate_langflow_api_key()
+                if api_key:
+                    async with httpx.AsyncClient(timeout=10.0) as client:
+                        response = await client.get(
+                            f"{LANGFLOW_URL}/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}",
+                            headers={"x-api-key": api_key},
+                        )
+                        if response.status_code == 200:
+                            flow_data = response.json()
+
+                            # Extract component defaults (ingestion-specific settings only)
+                            ingestion_defaults = {
+                                "chunkSize": 1000,
+                                "chunkOverlap": 200,
+                                "separator": "\\n",
+                                "embeddingModel": "text-embedding-3-small",
+                            }
+
+                            if flow_data.get("data", {}).get("nodes"):
+                                for node in flow_data["data"]["nodes"]:
+                                    node_template = (
+                                        node.get("data", {})
+                                        .get("node", {})
+                                        .get("template", {})
+                                    )
+
+                                    # Split Text component (SplitText-QIKhg)
+                                    if node.get("id") == "SplitText-QIKhg":
+                                        if node_template.get("chunk_size", {}).get(
+                                            "value"
+                                        ):
+                                            ingestion_defaults["chunkSize"] = (
+                                                node_template["chunk_size"]["value"]
+                                            )
+                                        if node_template.get("chunk_overlap", {}).get(
+                                            "value"
+                                        ):
+                                            ingestion_defaults["chunkOverlap"] = (
+                                                node_template["chunk_overlap"]["value"]
+                                            )
+                                        if node_template.get("separator", {}).get(
+                                            "value"
+                                        ):
+                                            ingestion_defaults["separator"] = (
+                                                node_template["separator"]["value"]
+                                            )
+
+                                    # OpenAI Embeddings component (OpenAIEmbeddings-joRJ6)
+                                    elif node.get("id") == "OpenAIEmbeddings-joRJ6":
+                                        if node_template.get("model", {}).get("value"):
+                                            ingestion_defaults["embeddingModel"] = (
+                                                node_template["model"]["value"]
+                                            )
+
+                                    # Note: OpenSearch component settings are not exposed for ingestion
+                                    # (search-related parameters like number_of_results, score_threshold
+                                    # are for retrieval, not ingestion)
+
+                            settings["ingestion_defaults"] = ingestion_defaults
+
+            except Exception as e:
+                print(f"[WARNING] Failed to fetch ingestion flow defaults: {e}")
+                # Continue without ingestion defaults
+
         return JSONResponse(settings)
 
     except Exception as e:
diff --git a/src/config/settings.py b/src/config/settings.py
index ef1196bf..ccc49de9 100644
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -1,19 +1,22 @@
 import os
-import requests
 import time
-from dotenv import load_dotenv
-from utils.logging_config import get_logger
 
-logger = get_logger(__name__)
+import httpx
+import requests
+from agentd.patch import patch_openai_with_mcp
+from docling.document_converter import DocumentConverter
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
 from opensearchpy import AsyncOpenSearch
 from opensearchpy._async.http_aiohttp import AIOHttpConnection
-from docling.document_converter import DocumentConverter
-from agentd.patch import patch_openai_with_mcp
-from openai import AsyncOpenAI
+
+from utils.logging_config import get_logger
 
 load_dotenv()
 load_dotenv("../")
 
+logger = get_logger(__name__)
+
 # Environment variables
 OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost")
 OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", "9200"))
@@ -22,8 +25,18 @@ OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD")
 LANGFLOW_URL = os.getenv("LANGFLOW_URL", "http://localhost:7860")
 # Optional: public URL for browser links (e.g., http://localhost:7860)
 LANGFLOW_PUBLIC_URL = os.getenv("LANGFLOW_PUBLIC_URL")
-FLOW_ID = os.getenv("FLOW_ID")
+# Backwards compatible flow ID handling with deprecation warnings
+_legacy_flow_id = os.getenv("FLOW_ID")
+
+LANGFLOW_CHAT_FLOW_ID = os.getenv("LANGFLOW_CHAT_FLOW_ID") or _legacy_flow_id
+LANGFLOW_INGEST_FLOW_ID = os.getenv("LANGFLOW_INGEST_FLOW_ID")
 NUDGES_FLOW_ID = os.getenv("NUDGES_FLOW_ID")
+
+if _legacy_flow_id and not os.getenv("LANGFLOW_CHAT_FLOW_ID"):
+    logger.warning("FLOW_ID is deprecated. Please use LANGFLOW_CHAT_FLOW_ID instead")
+    LANGFLOW_CHAT_FLOW_ID = _legacy_flow_id
+
+
 # Langflow superuser credentials for API key generation
 LANGFLOW_SUPERUSER = os.getenv("LANGFLOW_SUPERUSER")
 LANGFLOW_SUPERUSER_PASSWORD = os.getenv("LANGFLOW_SUPERUSER_PASSWORD")
@@ -94,15 +107,47 @@ INDEX_BODY = {
     },
 }
 
+# Convenience base URL for Langflow REST API
+LANGFLOW_BASE_URL = f"{LANGFLOW_URL}/api/v1"
+
 
 async def generate_langflow_api_key():
     """Generate Langflow API key using superuser credentials at startup"""
     global LANGFLOW_KEY
 
+    logger.debug(
+        "generate_langflow_api_key called", current_key_present=bool(LANGFLOW_KEY)
+    )
+
     # If key already provided via env, do not attempt generation
     if LANGFLOW_KEY:
-        logger.info("Using LANGFLOW_KEY from environment, skipping generation")
-        return LANGFLOW_KEY
+        if os.getenv("LANGFLOW_KEY"):
+            logger.info("Using LANGFLOW_KEY from environment; skipping generation")
+            return LANGFLOW_KEY
+        else:
+            # We have a cached key, but let's validate it first
+            logger.debug("Validating cached LANGFLOW_KEY", key_prefix=LANGFLOW_KEY[:8])
+            try:
+                validation_response = requests.get(
+                    f"{LANGFLOW_URL}/api/v1/users/whoami",
+                    headers={"x-api-key": LANGFLOW_KEY},
+                    timeout=5,
+                )
+                if validation_response.status_code == 200:
+                    logger.debug("Cached API key is valid", key_prefix=LANGFLOW_KEY[:8])
+                    return LANGFLOW_KEY
+                else:
+                    logger.warning(
+                        "Cached API key is invalid, generating fresh key",
+                        status_code=validation_response.status_code,
+                    )
+                    LANGFLOW_KEY = None  # Clear invalid key
+            except Exception as e:
+                logger.warning(
+                    "Cached API key validation failed, generating fresh key",
+                    error=str(e),
+                )
+                LANGFLOW_KEY = None  # Clear invalid key
 
     if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD:
         logger.warning(
@@ -115,7 +160,6 @@ async def generate_langflow_api_key():
         max_attempts = int(os.getenv("LANGFLOW_KEY_RETRIES", "15"))
         delay_seconds = float(os.getenv("LANGFLOW_KEY_RETRY_DELAY", "2.0"))
 
-        last_error = None
         for attempt in range(1, max_attempts + 1):
             try:
                 # Login to get access token
@@ -148,14 +192,28 @@ async def generate_langflow_api_key():
                 if not api_key:
                     raise KeyError("api_key")
 
-                LANGFLOW_KEY = api_key
-                logger.info(
-                    "Successfully generated Langflow API key",
-                    api_key_preview=api_key[:8],
+                # Validate the API key works
+                validation_response = requests.get(
+                    f"{LANGFLOW_URL}/api/v1/users/whoami",
+                    headers={"x-api-key": api_key},
+                    timeout=10,
                 )
-                return api_key
+                if validation_response.status_code == 200:
+                    LANGFLOW_KEY = api_key
+                    logger.info(
+                        "Successfully generated and validated Langflow API key",
+                        key_prefix=api_key[:8],
+                    )
+                    return api_key
+                else:
+                    logger.error(
+                        "Generated API key validation failed",
+                        status_code=validation_response.status_code,
+                    )
+                    raise ValueError(
+                        f"API key validation failed: {validation_response.status_code}"
+                    )
             except (requests.exceptions.RequestException, KeyError) as e:
-                last_error = e
                 logger.warning(
                     "Attempt to generate Langflow API key failed",
                     attempt=attempt,
@@ -182,6 +240,7 @@ class AppClients:
     def __init__(self):
         self.opensearch = None
         self.langflow_client = None
+        self.langflow_http_client = None
         self.patched_async_client = None
         self.converter = None
 
@@ -204,9 +263,15 @@ class AppClients:
         # Initialize Langflow client with generated/provided API key
         if LANGFLOW_KEY and self.langflow_client is None:
             try:
-                self.langflow_client = AsyncOpenAI(
-                    base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
-                )
+                if not OPENSEARCH_PASSWORD:
+                    raise ValueError("OPENSEARCH_PASSWORD is not set")
+                else:
+                    await self.ensure_langflow_client()
+                    # Note: OPENSEARCH_PASSWORD global variable should be created automatically
+                    # via LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT in docker-compose
+                    logger.info(
+                        "Langflow client initialized - OPENSEARCH_PASSWORD should be available via environment variables"
+                    )
             except Exception as e:
                 logger.warning("Failed to initialize Langflow client", error=str(e))
                 self.langflow_client = None
@@ -221,6 +286,11 @@ class AppClients:
         # Initialize document converter
         self.converter = DocumentConverter()
 
+        # Initialize Langflow HTTP client
+        self.langflow_http_client = httpx.AsyncClient(
+            base_url=LANGFLOW_URL, timeout=60.0
+        )
+
         return self
 
     async def ensure_langflow_client(self):
@@ -242,6 +312,71 @@ class AppClients:
                 self.langflow_client = None
         return self.langflow_client
 
+    async def langflow_request(self, method: str, endpoint: str, **kwargs):
+        """Central method for all Langflow API requests"""
+        api_key = await generate_langflow_api_key()
+        if not api_key:
+            raise ValueError("No Langflow API key available")
+
+        # Merge headers properly - passed headers take precedence over defaults
+        default_headers = {"x-api-key": api_key, "Content-Type": "application/json"}
+        existing_headers = kwargs.pop("headers", {})
+        headers = {**default_headers, **existing_headers}
+
+        # Remove Content-Type if explicitly set to None (for file uploads)
+        if headers.get("Content-Type") is None:
+            headers.pop("Content-Type", None)
+
+        url = f"{LANGFLOW_URL}{endpoint}"
+
+        return await self.langflow_http_client.request(
+            method=method, url=url, headers=headers, **kwargs
+        )
+
+    async def _create_langflow_global_variable(self, name: str, value: str):
+        """Create a global variable in Langflow via API"""
+        api_key = await generate_langflow_api_key()
+        if not api_key:
+            logger.warning(
+                "Cannot create Langflow global variable: No API key", variable_name=name
+            )
+            return
+
+        url = f"{LANGFLOW_URL}/api/v1/variables/"
+        payload = {
+            "name": name,
+            "value": value,
+            "default_fields": [],
+            "type": "Credential",
+        }
+        headers = {"x-api-key": api_key, "Content-Type": "application/json"}
+
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(url, headers=headers, json=payload)
+
+                if response.status_code in [200, 201]:
+                    logger.info(
+                        "Successfully created Langflow global variable",
+                        variable_name=name,
+                    )
+                elif response.status_code == 400 and "already exists" in response.text:
+                    logger.info(
+                        "Langflow global variable already exists", variable_name=name
+                    )
+                else:
+                    logger.warning(
+                        "Failed to create Langflow global variable",
+                        variable_name=name,
+                        status_code=response.status_code,
+                    )
+        except Exception as e:
+            logger.error(
+                "Exception creating Langflow global variable",
+                variable_name=name,
+                error=str(e),
+            )
+
     def create_user_opensearch_client(self, jwt_token: str):
         """Create OpenSearch client with user's JWT token for OIDC auth"""
         headers = {"Authorization": f"Bearer {jwt_token}"}
diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py
index c0ef2ff3..71eb24e0 100644
--- a/src/connectors/google_drive/connector.py
+++ b/src/connectors/google_drive/connector.py
@@ -400,8 +400,9 @@ class GoogleDriveConnector(BaseConnector):
         export_mime = self._pick_export_mime(mime_type)
         if mime_type.startswith("application/vnd.google-apps."):
             # default fallback if not overridden
-            if not export_mime:
-                export_mime = "application/pdf"
+            #if not export_mime:
+            #    export_mime = "application/pdf"
+            export_mime = "application/pdf"
             # NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
             request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
         else:
diff --git a/src/connectors/langflow_connector_service.py b/src/connectors/langflow_connector_service.py
new file mode 100644
index 00000000..ef68816d
--- /dev/null
+++ b/src/connectors/langflow_connector_service.py
@@ -0,0 +1,302 @@
+import os
+import tempfile
+from typing import Any, Dict, List, Optional
+
+# Create custom processor for connector files using Langflow
+from models.processors import LangflowConnectorFileProcessor
+from services.langflow_file_service import LangflowFileService
+from utils.logging_config import get_logger
+
+from .base import BaseConnector, ConnectorDocument
+from .connection_manager import ConnectionManager
+
+logger = get_logger(__name__)
+
+
+class LangflowConnectorService:
+    """Service to manage connector documents and process them via Langflow"""
+
+    def __init__(
+        self,
+        task_service=None,
+        session_manager=None,
+    ):
+        self.task_service = task_service
+        self.session_manager = session_manager
+        self.connection_manager = ConnectionManager()
+
+        # Initialize LangflowFileService for processing connector documents
+        self.langflow_service = LangflowFileService()
+
+    async def initialize(self):
+        """Initialize the service by loading existing connections"""
+        await self.connection_manager.load_connections()
+
+    async def get_connector(self, connection_id: str) -> Optional[BaseConnector]:
+        """Get a connector by connection ID"""
+        return await self.connection_manager.get_connector(connection_id)
+
+    async def process_connector_document(
+        self,
+        document: ConnectorDocument,
+        owner_user_id: str,
+        connector_type: str,
+        jwt_token: str = None,
+        owner_name: str = None,
+        owner_email: str = None,
+    ) -> Dict[str, Any]:
+        """Process a document from a connector using LangflowFileService pattern"""
+
+        logger.debug(
+            "Processing connector document via Langflow",
+            document_id=document.id,
+            filename=document.filename,
+        )
+
+        suffix = self._get_file_extension(document.mimetype)
+
+        # Create temporary file from document content
+        with tempfile.NamedTemporaryFile(
+            delete=False, suffix=suffix
+        ) as tmp_file:
+            tmp_file.write(document.content)
+            tmp_file.flush()
+
+            try:
+                # Step 1: Upload file to Langflow
+                logger.debug("Uploading file to Langflow", filename=document.filename)
+                content = document.content
+                file_tuple = (
+                    document.filename.replace(" ", "_").replace("/", "_")+suffix,
+                    content,
+                    document.mimetype or "application/octet-stream",
+                )
+
+                upload_result = await self.langflow_service.upload_user_file(
+                    file_tuple, jwt_token
+                )
+                langflow_file_id = upload_result["id"]
+                langflow_file_path = upload_result["path"]
+
+                logger.debug(
+                    "File uploaded to Langflow",
+                    file_id=langflow_file_id,
+                    path=langflow_file_path,
+                )
+
+                # Step 2: Run ingestion flow with the uploaded file
+                logger.debug(
+                    "Running Langflow ingestion flow", file_path=langflow_file_path
+                )
+
+                # Use the same tweaks pattern as LangflowFileService
+                tweaks = {}  # Let Langflow handle the ingestion with default settings
+
+                ingestion_result = await self.langflow_service.run_ingestion_flow(
+                    file_paths=[langflow_file_path],
+                    jwt_token=jwt_token,
+                    tweaks=tweaks,
+                    owner=owner_user_id,
+                    owner_name=owner_name,
+                    owner_email=owner_email,
+                    connector_type=connector_type,
+                )
+
+                logger.debug("Ingestion flow completed", result=ingestion_result)
+
+                # Step 3: Delete the file from Langflow
+                logger.debug("Deleting file from Langflow", file_id=langflow_file_id)
+                await self.langflow_service.delete_user_file(langflow_file_id)
+                logger.debug("File deleted from Langflow", file_id=langflow_file_id)
+
+                return {
+                    "status": "indexed",
+                    "filename": document.filename,
+                    "source_url": document.source_url,
+                    "document_id": document.id,
+                    "connector_type": connector_type,
+                    "langflow_result": ingestion_result,
+                }
+
+            except Exception as e:
+                logger.error(
+                    "Failed to process connector document via Langflow",
+                    document_id=document.id,
+                    error=str(e),
+                )
+                # Try to clean up Langflow file if upload succeeded but processing failed
+                if "langflow_file_id" in locals():
+                    try:
+                        await self.langflow_service.delete_user_file(langflow_file_id)
+                        logger.debug(
+                            "Cleaned up Langflow file after error",
+                            file_id=langflow_file_id,
+                        )
+                    except Exception as cleanup_error:
+                        logger.warning(
+                            "Failed to cleanup Langflow file",
+                            file_id=langflow_file_id,
+                            error=str(cleanup_error),
+                        )
+                raise
+
+            finally:
+                # Clean up temporary file
+                os.unlink(tmp_file.name)
+
+    def _get_file_extension(self, mimetype: str) -> str:
+        """Get file extension based on MIME type"""
+        mime_to_ext = {
+            "application/pdf": ".pdf",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+            "application/msword": ".doc",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
+            "application/vnd.ms-powerpoint": ".ppt",
+            "text/plain": ".txt",
+            "text/html": ".html",
+            "application/rtf": ".rtf",
+            "application/vnd.google-apps.document": ".pdf",  # Exported as PDF
+            "application/vnd.google-apps.presentation": ".pdf",
+            "application/vnd.google-apps.spreadsheet": ".pdf",
+        }
+        return mime_to_ext.get(mimetype, ".bin")
+
+    async def sync_connector_files(
+        self,
+        connection_id: str,
+        user_id: str,
+        max_files: int = None,
+        jwt_token: str = None,
+    ) -> str:
+        """Sync files from a connector connection using Langflow processing"""
+        if not self.task_service:
+            raise ValueError(
+                "TaskService not available - connector sync requires task service dependency"
+            )
+
+        logger.debug(
+            "Starting Langflow-based sync for connection",
+            connection_id=connection_id,
+            max_files=max_files,
+        )
+
+        connector = await self.get_connector(connection_id)
+        if not connector:
+            raise ValueError(
+                f"Connection '{connection_id}' not found or not authenticated"
+            )
+
+        logger.debug("Got connector", authenticated=connector.is_authenticated)
+
+        if not connector.is_authenticated:
+            raise ValueError(f"Connection '{connection_id}' not authenticated")
+
+        # Collect files to process (limited by max_files)
+        files_to_process = []
+        page_token = None
+
+        # Calculate page size to minimize API calls
+        page_size = min(max_files or 100, 1000) if max_files else 100
+
+        while True:
+            # List files from connector with limit
+            logger.debug(
+                "Calling list_files", page_size=page_size, page_token=page_token
+            )
+            file_list = await connector.list_files(page_token, limit=page_size)
+            logger.debug(
+                "Got files from connector", file_count=len(file_list.get("files", []))
+            )
+            files = file_list["files"]
+
+            if not files:
+                break
+
+            for file_info in files:
+                if max_files and len(files_to_process) >= max_files:
+                    break
+                files_to_process.append(file_info)
+
+            # Stop if we have enough files or no more pages
+            if (max_files and len(files_to_process) >= max_files) or not file_list.get(
+                "nextPageToken"
+            ):
+                break
+
+            page_token = file_list.get("nextPageToken")
+
+        # Get user information
+        user = self.session_manager.get_user(user_id) if self.session_manager else None
+        owner_name = user.name if user else None
+        owner_email = user.email if user else None
+
+        processor = LangflowConnectorFileProcessor(
+            self,
+            connection_id,
+            files_to_process,
+            user_id,
+            jwt_token=jwt_token,
+            owner_name=owner_name,
+            owner_email=owner_email,
+        )
+
+        # Use file IDs as items
+        file_ids = [file_info["id"] for file_info in files_to_process]
+
+        # Create custom task using TaskService
+        task_id = await self.task_service.create_custom_task(
+            user_id, file_ids, processor
+        )
+
+        return task_id
+
+    async def sync_specific_files(
+        self,
+        connection_id: str,
+        user_id: str,
+        file_ids: List[str],
+        jwt_token: str = None,
+    ) -> str:
+        """Sync specific files by their IDs using Langflow processing"""
+        if not self.task_service:
+            raise ValueError(
+                "TaskService not available - connector sync requires task service dependency"
+            )
+
+        connector = await self.get_connector(connection_id)
+        if not connector:
+            raise ValueError(
+                f"Connection '{connection_id}' not found or not authenticated"
+            )
+
+        if not connector.is_authenticated:
+            raise ValueError(f"Connection '{connection_id}' not authenticated")
+
+        if not file_ids:
+            raise ValueError("No file IDs provided")
+
+        # Get user information
+        user = self.session_manager.get_user(user_id) if self.session_manager else None
+        owner_name = user.name if user else None
+        owner_email = user.email if user else None
+
+        processor = LangflowConnectorFileProcessor(
+            self,
+            connection_id,
+            file_ids,
+            user_id,
+            jwt_token=jwt_token,
+            owner_name=owner_name,
+            owner_email=owner_email,
+        )
+
+        # Create custom task using TaskService
+        task_id = await self.task_service.create_custom_task(
+            user_id, file_ids, processor
+        )
+
+        return task_id
+
+    async def _get_connector(self, connection_id: str) -> Optional[BaseConnector]:
+        """Get a connector by connection ID (alias for get_connector)"""
+        return await self.get_connector(connection_id)
diff --git a/src/main.py b/src/main.py
index bbc6c4f4..5b5bfc79 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,6 +1,7 @@
 
 # Configure structured logging early
 from services.flows_service import FlowsService
+from connectors.langflow_connector_service import LangflowConnectorService
 from utils.logging_config import configure_from_env, get_logger
 
 configure_from_env()
@@ -12,34 +13,58 @@ import multiprocessing
 import os
 import subprocess
 from functools import partial
+
 from starlette.applications import Starlette
 from starlette.routing import Route
 
 # Set multiprocessing start method to 'spawn' for CUDA compatibility
 multiprocessing.set_start_method("spawn", force=True)
 
+# Create process pool FIRST, before any torch/CUDA imports
 from utils.process_pool import process_pool
 
 import torch
 
+# API endpoints
+from api import (
+    auth,
+    chat,
+    connectors,
+    knowledge_filter,
+    langflow_files,
+    oidc,
+    search,
+    settings,
+    tasks,
+    upload,
+)
+from auth_middleware import optional_auth, require_auth
+
 # Configuration and setup
-from config.settings import clients, INDEX_NAME, INDEX_BODY, SESSION_SECRET
-from config.settings import is_no_auth_mode
-from utils.gpu_detection import detect_gpu_devices
+from config.settings import (
+    INDEX_BODY,
+    INDEX_NAME,
+    SESSION_SECRET,
+    clients,
+    is_no_auth_mode,
+)
+
+# Existing services
+from services.auth_service import AuthService
+from services.chat_service import ChatService
 
 # Services
 from services.document_service import DocumentService
+from services.knowledge_filter_service import KnowledgeFilterService
+
+# Configuration and setup
+# Services
+from services.langflow_file_service import LangflowFileService
+from services.monitor_service import MonitorService
 from services.search_service import SearchService
 from services.task_service import TaskService
-from services.auth_service import AuthService
-from services.chat_service import ChatService
-from services.knowledge_filter_service import KnowledgeFilterService
-from services.monitor_service import MonitorService
-
-# Existing services
-from connectors.service import ConnectorService
 from session_manager import SessionManager
-from auth_middleware import require_auth, optional_auth
+from utils.process_pool import process_pool
 
 # API endpoints
 from api import (
@@ -217,7 +242,10 @@ async def ingest_default_documents_when_ready(services):
         logger.info("Ingesting default documents when ready")
         base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
         if not os.path.isdir(base_dir):
-            logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir)
+            logger.info(
+                "Default documents directory not found; skipping ingestion",
+                base_dir=base_dir,
+            )
             return
 
         # Collect files recursively
@@ -228,7 +256,9 @@ async def ingest_default_documents_when_ready(services):
         ]
 
         if not file_paths:
-            logger.info("No default documents found; nothing to ingest", base_dir=base_dir)
+            logger.info(
+                "No default documents found; nothing to ingest", base_dir=base_dir
+            )
             return
 
         # Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
@@ -253,12 +283,14 @@ async def ingest_default_documents_when_ready(services):
     except Exception as e:
         logger.error("Default documents ingestion failed", error=str(e))
 
+
 async def startup_tasks(services):
     """Startup tasks"""
     logger.info("Starting startup tasks")
     await init_index()
     await ingest_default_documents_when_ready(services)
 
+
 async def initialize_services():
     """Initialize all services and their dependencies"""
     # Generate JWT keys if they don't exist
@@ -283,11 +315,7 @@ async def initialize_services():
     document_service.process_pool = process_pool
 
     # Initialize connector service
-    connector_service = ConnectorService(
-        patched_async_client=clients.patched_async_client,
-        process_pool=process_pool,
-        embed_model="text-embedding-3-small",
-        index_name=INDEX_NAME,
+    connector_service = LangflowConnectorService(
         task_service=task_service,
         session_manager=session_manager,
     )
@@ -298,7 +326,6 @@ async def initialize_services():
     # Load persisted connector connections at startup so webhooks and syncs
     # can resolve existing subscriptions immediately after server boot
     # Skip in no-auth mode since connectors require OAuth
-    from config.settings import is_no_auth_mode
 
     if not is_no_auth_mode():
         try:
@@ -315,12 +342,15 @@ async def initialize_services():
     else:
         logger.info("[CONNECTORS] Skipping connection loading in no-auth mode")
 
+    langflow_file_service = LangflowFileService()
+
     return {
         "document_service": document_service,
         "search_service": search_service,
         "task_service": task_service,
         "chat_service": chat_service,
         "flows_service": flows_service,
+        "langflow_file_service": langflow_file_service,
         "auth_service": auth_service,
         "connector_service": connector_service,
         "knowledge_filter_service": knowledge_filter_service,
@@ -347,6 +377,40 @@ async def create_app():
             ),
             methods=["POST"],
         ),
+        # Langflow Files endpoints
+        Route(
+            "/langflow/files/upload",
+            optional_auth(services["session_manager"])(
+                partial(
+                    langflow_files.upload_user_file,
+                    langflow_file_service=services["langflow_file_service"],
+                    session_manager=services["session_manager"],
+                )
+            ),
+            methods=["POST"],
+        ),
+        Route(
+            "/langflow/ingest",
+            require_auth(services["session_manager"])(
+                partial(
+                    langflow_files.run_ingestion,
+                    langflow_file_service=services["langflow_file_service"],
+                    session_manager=services["session_manager"],
+                )
+            ),
+            methods=["POST"],
+        ),
+        Route(
+            "/langflow/files",
+            require_auth(services["session_manager"])(
+                partial(
+                    langflow_files.delete_user_files,
+                    langflow_file_service=services["langflow_file_service"],
+                    session_manager=services["session_manager"],
+                )
+            ),
+            methods=["DELETE"],
+        ),
         Route(
             "/upload_context",
             require_auth(services["session_manager"])(
diff --git a/src/models/processors.py b/src/models/processors.py
index ed5a1bb4..02836020 100644
--- a/src/models/processors.py
+++ b/src/models/processors.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict
+from typing import Any
 from .tasks import UploadTask, FileTask
 from utils.logging_config import get_logger
 
@@ -91,10 +91,9 @@ class ConnectorFileProcessor(TaskProcessor):
     ) -> None:
         """Process a connector file using ConnectorService"""
         from models.tasks import TaskStatus
-        import time
 
         file_id = item  # item is the connector file ID
-        file_info = self.file_info_map.get(file_id)
+        self.file_info_map.get(file_id)
 
         # Get the connector and connection info
         connector = await self.connector_service.get_connector(self.connection_id)
@@ -126,6 +125,79 @@ class ConnectorFileProcessor(TaskProcessor):
         upload_task.successful_files += 1
 
 
+class LangflowConnectorFileProcessor(TaskProcessor):
+    """Processor for connector file uploads using Langflow"""
+
+    def __init__(
+        self,
+        langflow_connector_service,
+        connection_id: str,
+        files_to_process: list,
+        user_id: str = None,
+        jwt_token: str = None,
+        owner_name: str = None,
+        owner_email: str = None,
+    ):
+        self.langflow_connector_service = langflow_connector_service
+        self.connection_id = connection_id
+        self.files_to_process = files_to_process
+        self.user_id = user_id
+        self.jwt_token = jwt_token
+        self.owner_name = owner_name
+        self.owner_email = owner_email
+        # Create lookup map for file info - handle both file objects and file IDs
+        self.file_info_map = {}
+        for f in files_to_process:
+            if isinstance(f, dict):
+                # Full file info objects
+                self.file_info_map[f["id"]] = f
+            else:
+                # Just file IDs - will need to fetch metadata during processing
+                self.file_info_map[f] = None
+
+    async def process_item(
+        self, upload_task: UploadTask, item: str, file_task: FileTask
+    ) -> None:
+        """Process a connector file using LangflowConnectorService"""
+        from models.tasks import TaskStatus
+
+        file_id = item  # item is the connector file ID
+        self.file_info_map.get(file_id)
+
+        # Get the connector and connection info
+        connector = await self.langflow_connector_service.get_connector(
+            self.connection_id
+        )
+        connection = (
+            await self.langflow_connector_service.connection_manager.get_connection(
+                self.connection_id
+            )
+        )
+        if not connector or not connection:
+            raise ValueError(f"Connection '{self.connection_id}' not found")
+
+        # Get file content from connector (the connector will fetch metadata if needed)
+        document = await connector.get_file_content(file_id)
+
+        # Use the user_id passed during initialization
+        if not self.user_id:
+            raise ValueError("user_id not provided to LangflowConnectorFileProcessor")
+
+        # Process using Langflow pipeline
+        result = await self.langflow_connector_service.process_connector_document(
+            document,
+            self.user_id,
+            connection.connector_type,
+            jwt_token=self.jwt_token,
+            owner_name=self.owner_name,
+            owner_email=self.owner_email,
+        )
+
+        file_task.status = TaskStatus.COMPLETED
+        file_task.result = result
+        upload_task.successful_files += 1
+
+
 class S3FileProcessor(TaskProcessor):
     """Processor for files stored in S3 buckets"""
 
diff --git a/src/services/chat_service.py b/src/services/chat_service.py
index f7c4e912..d2fe7ca9 100644
--- a/src/services/chat_service.py
+++ b/src/services/chat_service.py
@@ -1,4 +1,4 @@
-from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, FLOW_ID
+from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL
 from agent import (
     async_chat,
     async_langflow,
@@ -6,10 +6,15 @@ from agent import (
 )
 from auth_context import set_auth_context
 import json
+
 from utils.logging_config import get_logger
 
 logger = get_logger(__name__)
 
+from agent import async_chat, async_chat_stream, async_langflow
+from auth_context import set_auth_context
+from config.settings import LANGFLOW_CHAT_FLOW_ID, LANGFLOW_URL, clients
+
 
 class ChatService:
     async def chat(
@@ -59,9 +64,9 @@ class ChatService:
         if not prompt:
             raise ValueError("Prompt is required")
 
-        if not LANGFLOW_URL or not FLOW_ID:
+        if not LANGFLOW_URL or not LANGFLOW_CHAT_FLOW_ID:
             raise ValueError(
-                "LANGFLOW_URL and FLOW_ID environment variables are required"
+                "LANGFLOW_URL and LANGFLOW_CHAT_FLOW_ID environment variables are required"
             )
 
         # Prepare extra headers for JWT authentication
@@ -71,9 +76,9 @@ class ChatService:
 
         # Get context variables for filters, limit, and threshold
         from auth_context import (
+            get_score_threshold,
             get_search_filters,
             get_search_limit,
-            get_score_threshold,
         )
 
         filters = get_search_filters()
@@ -135,7 +140,7 @@ class ChatService:
 
             return async_langflow_chat_stream(
                 langflow_client,
-                FLOW_ID,
+                LANGFLOW_CHAT_FLOW_ID,
                 prompt,
                 user_id,
                 extra_headers=extra_headers,
@@ -146,7 +151,7 @@ class ChatService:
 
             response_text, response_id = await async_langflow_chat(
                 langflow_client,
-                FLOW_ID,
+                LANGFLOW_CHAT_FLOW_ID,
                 prompt,
                 user_id,
                 extra_headers=extra_headers,
@@ -237,9 +242,9 @@ class ChatService:
                     "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
                 )
             response_text, response_id = await async_langflow(
-                langflow_client,
-                FLOW_ID,
-                document_prompt,
+                langflow_client=langflow_client,
+                flow_id=LANGFLOW_CHAT_FLOW_ID,
+                prompt=document_prompt,
                 extra_headers=extra_headers,
                 previous_response_id=previous_response_id,
             )
@@ -258,17 +263,17 @@ class ChatService:
 
     async def get_chat_history(self, user_id: str):
         """Get chat conversation history for a user"""
-        from agent import get_user_conversations, active_conversations
+        from agent import active_conversations, get_user_conversations
 
         if not user_id:
             return {"error": "User ID is required", "conversations": []}
 
         # Get metadata from persistent storage
         conversations_dict = get_user_conversations(user_id)
-        
+
         # Get in-memory conversations (with function calls)
         in_memory_conversations = active_conversations.get(user_id, {})
-        
+
         logger.debug(
             "Getting chat history for user",
             user_id=user_id,
@@ -278,7 +283,7 @@ class ChatService:
 
         # Convert conversations dict to list format with metadata
         conversations = []
-        
+
         # First, process in-memory conversations (they have function calls)
         for response_id, conversation_state in in_memory_conversations.items():
             # Filter out system messages
@@ -294,13 +299,13 @@ class ChatService:
                     }
                     if msg.get("response_id"):
                         message_data["response_id"] = msg["response_id"]
-                    
+
                     # Include function call data if present
                     if msg.get("chunks"):
                         message_data["chunks"] = msg["chunks"]
                     if msg.get("response_data"):
                         message_data["response_data"] = msg["response_data"]
-                        
+
                     messages.append(message_data)
 
             if messages:  # Only include conversations with actual messages
@@ -334,25 +339,27 @@ class ChatService:
                             "previous_response_id"
                         ),
                         "total_messages": len(messages),
-                        "source": "in_memory"
+                        "source": "in_memory",
                     }
                 )
-        
+
         # Then, add any persistent metadata that doesn't have in-memory data
         for response_id, metadata in conversations_dict.items():
             if response_id not in in_memory_conversations:
                 # This is metadata-only conversation (no function calls)
-                conversations.append({
-                    "response_id": response_id,
-                    "title": metadata.get("title", "New Chat"),
-                    "endpoint": "chat",
-                    "messages": [],  # No messages in metadata-only
-                    "created_at": metadata.get("created_at"),
-                    "last_activity": metadata.get("last_activity"),
-                    "previous_response_id": metadata.get("previous_response_id"),
-                    "total_messages": metadata.get("total_messages", 0),
-                    "source": "metadata_only"
-                })
+                conversations.append(
+                    {
+                        "response_id": response_id,
+                        "title": metadata.get("title", "New Chat"),
+                        "endpoint": "chat",
+                        "messages": [],  # No messages in metadata-only
+                        "created_at": metadata.get("created_at"),
+                        "last_activity": metadata.get("last_activity"),
+                        "previous_response_id": metadata.get("previous_response_id"),
+                        "total_messages": metadata.get("total_messages", 0),
+                        "source": "metadata_only",
+                    }
+                )
 
         # Sort by last activity (most recent first)
         conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
@@ -368,7 +375,7 @@ class ChatService:
         """Get langflow conversation history for a user - now fetches from both OpenRAG memory and Langflow database"""
         from agent import get_user_conversations
         from services.langflow_history_service import langflow_history_service
-        
+
         if not user_id:
             return {"error": "User ID is required", "conversations": []}
 
@@ -378,27 +385,27 @@ class ChatService:
             # 1. Get local conversation metadata (no actual messages stored here)
             conversations_dict = get_user_conversations(user_id)
             local_metadata = {}
-            
+
             for response_id, conversation_metadata in conversations_dict.items():
                 # Store metadata for later use with Langflow data
                 local_metadata[response_id] = conversation_metadata
-            
+
             # 2. Get actual conversations from Langflow database (source of truth for messages)
             print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}")
             langflow_history = (
                 await langflow_history_service.get_user_conversation_history(
-                    user_id, flow_id=FLOW_ID
+                    user_id, flow_id=LANGFLOW_CHAT_FLOW_ID
                 )
             )
 
             if langflow_history.get("conversations"):
                 for conversation in langflow_history["conversations"]:
                     session_id = conversation["session_id"]
-                    
+
                     # Only process sessions that belong to this user (exist in local metadata)
                     if session_id not in local_metadata:
                         continue
-                    
+
                     # Use Langflow messages (with function calls) as source of truth
                     messages = []
                     for msg in conversation.get("messages", []):
@@ -407,62 +414,73 @@ class ChatService:
                             "content": msg["content"],
                             "timestamp": msg.get("timestamp"),
                             "langflow_message_id": msg.get("langflow_message_id"),
-                            "source": "langflow"
+                            "source": "langflow",
                         }
-                        
+
                         # Include function call data if present
                         if msg.get("chunks"):
                             message_data["chunks"] = msg["chunks"]
                         if msg.get("response_data"):
                             message_data["response_data"] = msg["response_data"]
-                            
+
                         messages.append(message_data)
-                    
+
                     if messages:
                         # Use local metadata if available, otherwise generate from Langflow data
                         metadata = local_metadata.get(session_id, {})
-                        
+
                         if not metadata.get("title"):
-                            first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None)
+                            first_user_msg = next(
+                                (msg for msg in messages if msg["role"] == "user"), None
+                            )
                             title = (
                                 first_user_msg["content"][:50] + "..."
-                                if first_user_msg and len(first_user_msg["content"]) > 50
+                                if first_user_msg
+                                and len(first_user_msg["content"]) > 50
                                 else first_user_msg["content"]
                                 if first_user_msg
                                 else "Langflow chat"
                             )
                         else:
                             title = metadata["title"]
-                        
-                        all_conversations.append({
-                            "response_id": session_id,
-                            "title": title,
-                            "endpoint": "langflow",
-                            "messages": messages,  # Function calls preserved from Langflow
-                            "created_at": metadata.get("created_at") or conversation.get("created_at"),
-                            "last_activity": metadata.get("last_activity") or conversation.get("last_activity"),
-                            "total_messages": len(messages),
-                            "source": "langflow_enhanced",
-                            "langflow_session_id": session_id,
-                            "langflow_flow_id": conversation.get("flow_id")
-                        })
-            
+
+                        all_conversations.append(
+                            {
+                                "response_id": session_id,
+                                "title": title,
+                                "endpoint": "langflow",
+                                "messages": messages,  # Function calls preserved from Langflow
+                                "created_at": metadata.get("created_at")
+                                or conversation.get("created_at"),
+                                "last_activity": metadata.get("last_activity")
+                                or conversation.get("last_activity"),
+                                "total_messages": len(messages),
+                                "source": "langflow_enhanced",
+                                "langflow_session_id": session_id,
+                                "langflow_flow_id": conversation.get("flow_id"),
+                            }
+                        )
+
             # 3. Add any local metadata that doesn't have Langflow data yet (recent conversations)
             for response_id, metadata in local_metadata.items():
                 if not any(c["response_id"] == response_id for c in all_conversations):
-                    all_conversations.append({
-                        "response_id": response_id,
-                        "title": metadata.get("title", "New Chat"),
-                        "endpoint": "langflow", 
-                        "messages": [],  # Will be filled when Langflow sync catches up
-                        "created_at": metadata.get("created_at"),
-                        "last_activity": metadata.get("last_activity"),
-                        "total_messages": metadata.get("total_messages", 0),
-                        "source": "metadata_only"
-                    })
-                
+                    all_conversations.append(
+                        {
+                            "response_id": response_id,
+                            "title": metadata.get("title", "New Chat"),
+                            "endpoint": "langflow",
+                            "messages": [],  # Will be filled when Langflow sync catches up
+                            "created_at": metadata.get("created_at"),
+                            "last_activity": metadata.get("last_activity"),
+                            "total_messages": metadata.get("total_messages", 0),
+                            "source": "metadata_only",
+                        }
+                    )
+
             if langflow_history.get("conversations"):
-                print(f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow")
+                print(
+                    f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow"
+                )
             elif langflow_history.get("error"):
                 print(
                     f"[DEBUG] Could not fetch Langflow history for user {user_id}: {langflow_history['error']}"
@@ -473,12 +491,14 @@ class ChatService:
         except Exception as e:
             print(f"[ERROR] Failed to fetch Langflow history: {e}")
             # Continue with just in-memory conversations
-        
+
         # Sort by last activity (most recent first)
         all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True)
-        
-        print(f"[DEBUG] Returning {len(all_conversations)} conversations ({len(local_metadata)} from local metadata)")
-        
+
+        print(
+            f"[DEBUG] Returning {len(all_conversations)} conversations ({len(local_metadata)} from local metadata)"
+        )
+
         return {
             "user_id": user_id,
             "endpoint": "langflow",
diff --git a/src/services/langflow_file_service.py b/src/services/langflow_file_service.py
new file mode 100644
index 00000000..ed0652cb
--- /dev/null
+++ b/src/services/langflow_file_service.py
@@ -0,0 +1,157 @@
+from typing import Any, Dict, List, Optional
+
+from config.settings import LANGFLOW_INGEST_FLOW_ID, clients
+from utils.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class LangflowFileService:
+    def __init__(self):
+        self.flow_id_ingest = LANGFLOW_INGEST_FLOW_ID
+
+    async def upload_user_file(
+        self, file_tuple, jwt_token: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Upload a file using Langflow Files API v2: POST /api/v2/files.
+        Returns JSON with keys: id, name, path, size, provider.
+        """
+        logger.debug("[LF] Upload (v2) -> /api/v2/files")
+        resp = await clients.langflow_request(
+            "POST",
+            "/api/v2/files",
+            files={"file": file_tuple},
+            headers={"Content-Type": None},
+        )
+        logger.debug(
+            "[LF] Upload response",
+            status_code=resp.status_code,
+            reason=resp.reason_phrase,
+        )
+        if resp.status_code >= 400:
+            logger.error(
+                "[LF] Upload failed",
+                status_code=resp.status_code,
+                reason=resp.reason_phrase,
+                body=resp.text,
+            )
+        resp.raise_for_status()
+        return resp.json()
+
+    async def delete_user_file(self, file_id: str) -> None:
+        """Delete a file by id using v2: DELETE /api/v2/files/{id}."""
+        # NOTE: use v2 root, not /api/v1
+        logger.debug("[LF] Delete (v2) -> /api/v2/files/{id}", file_id=file_id)
+        resp = await clients.langflow_request("DELETE", f"/api/v2/files/{file_id}")
+        logger.debug(
+            "[LF] Delete response",
+            status_code=resp.status_code,
+            reason=resp.reason_phrase,
+        )
+        if resp.status_code >= 400:
+            logger.error(
+                "[LF] Delete failed",
+                status_code=resp.status_code,
+                reason=resp.reason_phrase,
+                body=resp.text[:500],
+            )
+        resp.raise_for_status()
+
+    async def run_ingestion_flow(
+        self,
+        file_paths: List[str],
+        jwt_token: str,
+        session_id: Optional[str] = None,
+        tweaks: Optional[Dict[str, Any]] = None,
+        owner: Optional[str] = None,
+        owner_name: Optional[str] = None,
+        owner_email: Optional[str] = None,
+        connector_type: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Trigger the ingestion flow with provided file paths.
+        The flow must expose a File component path in input schema or accept files parameter.
+        """
+        if not self.flow_id_ingest:
+            logger.error("[LF] LANGFLOW_INGEST_FLOW_ID is not configured")
+            raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
+
+        payload: Dict[str, Any] = {
+            "input_value": "Ingest files",
+            "input_type": "chat",
+            "output_type": "text",  # Changed from "json" to "text"
+        }
+        if not tweaks:
+            tweaks = {}
+
+        # Pass files via tweaks to File component (File-PSU37 from the flow)
+        if file_paths:
+            tweaks["File-PSU37"] = {"path": file_paths}
+
+        # Pass JWT token via tweaks using the x-langflow-global-var- pattern
+        if jwt_token:
+            # Using the global variable pattern that Langflow expects for OpenSearch components
+            tweaks["OpenSearchHybrid-Ve6bS"] = {"jwt_token": jwt_token}
+            logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
+        else:
+            logger.warning("[LF] No JWT token provided")
+
+        # Pass metadata via tweaks to OpenSearch component
+        metadata_tweaks = []
+        if owner:
+            metadata_tweaks.append({"key": "owner", "value": owner})
+        if owner_name:
+            metadata_tweaks.append({"key": "owner_name", "value": owner_name})
+        if owner_email:
+            metadata_tweaks.append({"key": "owner_email", "value": owner_email})
+        if connector_type:
+            metadata_tweaks.append({"key": "connector_type", "value": connector_type})
+
+        if metadata_tweaks:
+            # Initialize the OpenSearch component tweaks if not already present
+            if "OpenSearchHybrid-Ve6bS" not in tweaks:
+                tweaks["OpenSearchHybrid-Ve6bS"] = {}
+            tweaks["OpenSearchHybrid-Ve6bS"]["docs_metadata"] = metadata_tweaks
+            logger.debug(
+                "[LF] Added metadata to tweaks", metadata_count=len(metadata_tweaks)
+            )
+        if tweaks:
+            payload["tweaks"] = tweaks
+        if session_id:
+            payload["session_id"] = session_id
+
+        logger.debug(
+            "[LF] Run ingestion -> /run/%s | files=%s session_id=%s tweaks_keys=%s jwt_present=%s",
+            self.flow_id_ingest,
+            len(file_paths) if file_paths else 0,
+            session_id,
+            list(tweaks.keys()) if isinstance(tweaks, dict) else None,
+            bool(jwt_token),
+        )
+
+        # Avoid logging full payload to prevent leaking sensitive data (e.g., JWT)
+
+        resp = await clients.langflow_request(
+            "POST", f"/api/v1/run/{self.flow_id_ingest}", json=payload
+        )
+        logger.debug(
+            "[LF] Run response", status_code=resp.status_code, reason=resp.reason_phrase
+        )
+        if resp.status_code >= 400:
+            logger.error(
+                "[LF] Run failed",
+                status_code=resp.status_code,
+                reason=resp.reason_phrase,
+                body=resp.text[:1000],
+            )
+        resp.raise_for_status()
+        try:
+            resp_json = resp.json()
+        except Exception as e:
+            logger.error(
+                "[LF] Failed to parse run response as JSON",
+                body=resp.text[:1000],
+                error=str(e),
+            )
+            raise
+        return resp_json
diff --git a/src/services/search_service.py b/src/services/search_service.py
index 222d6541..230c052f 100644
--- a/src/services/search_service.py
+++ b/src/services/search_service.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict
 from agentd.tool_decorator import tool
 from config.settings import clients, INDEX_NAME, EMBED_MODEL
 from auth_context import get_auth_context
@@ -166,11 +166,11 @@ class SearchService:
         for hit in results["hits"]["hits"]:
             chunks.append(
                 {
-                    "filename": hit["_source"]["filename"],
-                    "mimetype": hit["_source"]["mimetype"],
-                    "page": hit["_source"]["page"],
-                    "text": hit["_source"]["text"],
-                    "score": hit["_score"],
+                    "filename": hit["_source"].get("filename"),
+                    "mimetype": hit["_source"].get("mimetype"),
+                    "page": hit["_source"].get("page"),
+                    "text": hit["_source"].get("text"),
+                    "score": hit.get("_score"),
                     "source_url": hit["_source"].get("source_url"),
                     "owner": hit["_source"].get("owner"),
                     "owner_name": hit["_source"].get("owner_name"),
diff --git a/src/services/task_service.py b/src/services/task_service.py
index 6a691943..705f6f3c 100644
--- a/src/services/task_service.py
+++ b/src/services/task_service.py
@@ -1,12 +1,11 @@
 import asyncio
-import uuid
-import time
 import random
-from typing import Dict, Optional
+import time
+import uuid
 
-from models.tasks import TaskStatus, UploadTask, FileTask
-from utils.gpu_detection import get_worker_count
+from models.tasks import FileTask, TaskStatus, UploadTask
 from session_manager import AnonymousUser
+from utils.gpu_detection import get_worker_count
 from utils.logging_config import get_logger
 
 logger = get_logger(__name__)
@@ -16,9 +15,7 @@ class TaskService:
     def __init__(self, document_service=None, process_pool=None):
         self.document_service = document_service
         self.process_pool = process_pool
-        self.task_store: Dict[
-            str, Dict[str, UploadTask]
-        ] = {}  # user_id -> {task_id -> UploadTask}
+        self.task_store: dict[str, dict[str, UploadTask]] = {}  # user_id -> {task_id -> UploadTask}
         self.background_tasks = set()
 
         if self.process_pool is None:
@@ -69,9 +66,7 @@ class TaskService:
         self.task_store[user_id][task_id] = upload_task
 
         # Start background processing
-        background_task = asyncio.create_task(
-            self.background_custom_processor(user_id, task_id, items)
-        )
+        background_task = asyncio.create_task(self.background_custom_processor(user_id, task_id, items))
         self.background_tasks.add(background_task)
         background_task.add_done_callback(self.background_tasks.discard)
 
@@ -89,27 +84,18 @@ class TaskService:
 
             # Process files with limited concurrency to avoid overwhelming the system
             max_workers = get_worker_count()
-            semaphore = asyncio.Semaphore(
-                max_workers * 2
-            )  # Allow 2x process pool size for async I/O
+            semaphore = asyncio.Semaphore(max_workers * 2)  # Allow 2x process pool size for async I/O
 
             async def process_with_semaphore(file_path: str):
                 async with semaphore:
-                    await self.document_service.process_single_file_task(
-                        upload_task, file_path
-                    )
+                    await self.document_service.process_single_file_task(upload_task, file_path)
 
-            tasks = [
-                process_with_semaphore(file_path)
-                for file_path in upload_task.file_tasks.keys()
-            ]
+            tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()]
 
             await asyncio.gather(*tasks, return_exceptions=True)
 
         except Exception as e:
-            logger.error(
-                "Background upload processor failed", task_id=task_id, error=str(e)
-            )
+            logger.error("Background upload processor failed", task_id=task_id, error=str(e))
             import traceback
 
             traceback.print_exc()
@@ -117,9 +103,7 @@ class TaskService:
                 self.task_store[user_id][task_id].status = TaskStatus.FAILED
                 self.task_store[user_id][task_id].updated_at = time.time()
 
-    async def background_custom_processor(
-        self, user_id: str, task_id: str, items: list
-    ) -> None:
+    async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None:
         """Background task to process items using custom processor"""
         try:
             upload_task = self.task_store[user_id][task_id]
@@ -141,9 +125,7 @@ class TaskService:
                     try:
                         await processor.process_item(upload_task, item, file_task)
                     except Exception as e:
-                        logger.error(
-                            "Failed to process item", item=str(item), error=str(e)
-                        )
+                        logger.error("Failed to process item", item=str(item), error=str(e))
                         import traceback
 
                         traceback.print_exc()
@@ -170,9 +152,7 @@ class TaskService:
                 pass
             raise  # Re-raise to properly handle cancellation
         except Exception as e:
-            logger.error(
-                "Background custom processor failed", task_id=task_id, error=str(e)
-            )
+            logger.error("Background custom processor failed", task_id=task_id, error=str(e))
             import traceback
 
             traceback.print_exc()
@@ -180,7 +160,7 @@ class TaskService:
                 self.task_store[user_id][task_id].status = TaskStatus.FAILED
                 self.task_store[user_id][task_id].updated_at = time.time()
 
-    def get_task_status(self, user_id: str, task_id: str) -> Optional[dict]:
+    def get_task_status(self, user_id: str, task_id: str) -> dict | None:
         """Get the status of a specific upload task
 
         Includes fallback to shared tasks stored under the "anonymous" user key
@@ -194,10 +174,7 @@ class TaskService:
 
         upload_task = None
         for candidate_user_id in candidate_user_ids:
-            if (
-                candidate_user_id in self.task_store
-                and task_id in self.task_store[candidate_user_id]
-            ):
+            if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
                 upload_task = self.task_store[candidate_user_id][task_id]
                 break
 
@@ -271,10 +248,7 @@ class TaskService:
 
         store_user_id = None
         for candidate_user_id in candidate_user_ids:
-            if (
-                candidate_user_id in self.task_store
-                and task_id in self.task_store[candidate_user_id]
-            ):
+            if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
                 store_user_id = candidate_user_id
                 break
 
@@ -288,10 +262,7 @@ class TaskService:
             return False
 
         # Cancel the background task to stop scheduling new work
-        if (
-            hasattr(upload_task, "background_task")
-            and not upload_task.background_task.done()
-        ):
+        if hasattr(upload_task, "background_task") and not upload_task.background_task.done():
             upload_task.background_task.cancel()
 
         # Mark task as failed (cancelled)
diff --git a/src/tui/managers/env_manager.py b/src/tui/managers/env_manager.py
index 2b4ca2d3..5ee2fcc2 100644
--- a/src/tui/managers/env_manager.py
+++ b/src/tui/managers/env_manager.py
@@ -1,23 +1,23 @@
 """Environment configuration manager for OpenRAG TUI."""
 
-import os
 import secrets
 import string
+from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Optional, List
-from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
 from utils.logging_config import get_logger
 
 logger = get_logger(__name__)
 
 from ..utils.validation import (
-    validate_openai_api_key,
+    sanitize_env_value,
+    validate_documents_paths,
     validate_google_oauth_client_id,
     validate_non_empty,
+    validate_openai_api_key,
     validate_url,
-    validate_documents_paths,
-    sanitize_env_value,
 )
 
 
@@ -31,7 +31,8 @@ class EnvConfig:
     langflow_secret_key: str = ""
     langflow_superuser: str = "admin"
     langflow_superuser_password: str = ""
-    flow_id: str = "1098eea1-6649-4e1d-aed1-b77249fb8dd0"
+    langflow_chat_flow_id: str = "1098eea1-6649-4e1d-aed1-b77249fb8dd0"
+    langflow_ingest_flow_id: str = "5488df7c-b93f-4f87-a446-b67028bc0813"
 
     # OAuth settings
     google_oauth_client_id: str = ""
@@ -98,7 +99,8 @@ class EnvManager:
                             "LANGFLOW_SECRET_KEY": "langflow_secret_key",
                             "LANGFLOW_SUPERUSER": "langflow_superuser",
                             "LANGFLOW_SUPERUSER_PASSWORD": "langflow_superuser_password",
-                            "FLOW_ID": "flow_id",
+                            "LANGFLOW_CHAT_FLOW_ID": "langflow_chat_flow_id",
+                            "LANGFLOW_INGEST_FLOW_ID": "langflow_ingest_flow_id",
                             "NUDGES_FLOW_ID": "nudges_flow_id",
                             "GOOGLE_OAUTH_CLIENT_ID": "google_oauth_client_id",
                             "GOOGLE_OAUTH_CLIENT_SECRET": "google_oauth_client_secret",
@@ -235,7 +237,10 @@ class EnvManager:
                 f.write(
                     f"LANGFLOW_SUPERUSER_PASSWORD={self.config.langflow_superuser_password}\n"
                 )
-                f.write(f"FLOW_ID={self.config.flow_id}\n")
+                f.write(f"LANGFLOW_CHAT_FLOW_ID={self.config.langflow_chat_flow_id}\n")
+                f.write(
+                    f"LANGFLOW_INGEST_FLOW_ID={self.config.langflow_ingest_flow_id}\n"
+                )
                 f.write(f"NUDGES_FLOW_ID={self.config.nudges_flow_id}\n")
                 f.write(f"OPENSEARCH_PASSWORD={self.config.opensearch_password}\n")
                 f.write(f"OPENAI_API_KEY={self.config.openai_api_key}\n")
diff --git a/src/tui/screens/diagnostics.py b/src/tui/screens/diagnostics.py
index 5091654a..3be628f2 100644
--- a/src/tui/screens/diagnostics.py
+++ b/src/tui/screens/diagnostics.py
@@ -63,6 +63,7 @@ class DiagnosticsScreen(Screen):
                 yield Button("Refresh", variant="primary", id="refresh-btn")
                 yield Button("Check Podman", variant="default", id="check-podman-btn")
                 yield Button("Check Docker", variant="default", id="check-docker-btn")
+                yield Button("Check OpenSearch Security", variant="default", id="check-opensearch-security-btn")
                 yield Button("Copy to Clipboard", variant="default", id="copy-btn")
                 yield Button("Save to File", variant="default", id="save-btn")
                 yield Button("Back", variant="default", id="back-btn")
@@ -92,6 +93,8 @@ class DiagnosticsScreen(Screen):
             asyncio.create_task(self.check_podman())
         elif event.button.id == "check-docker-btn":
             asyncio.create_task(self.check_docker())
+        elif event.button.id == "check-opensearch-security-btn":
+            asyncio.create_task(self.check_opensearch_security())
         elif event.button.id == "copy-btn":
             self.copy_to_clipboard()
         elif event.button.id == "save-btn":
@@ -415,5 +418,208 @@ class DiagnosticsScreen(Screen):
 
         log.write("")
 
+    async def check_opensearch_security(self) -> None:
+        """Run OpenSearch security configuration diagnostics."""
+        log = self.query_one("#diagnostics-log", Log)
+        log.write("[bold green]OpenSearch Security Diagnostics[/bold green]")
+
+        # Get OpenSearch password from environment or prompt user that it's needed
+        opensearch_password = os.getenv("OPENSEARCH_PASSWORD")
+        if not opensearch_password:
+            log.write("[red]OPENSEARCH_PASSWORD environment variable not set[/red]")
+            log.write("[yellow]Set OPENSEARCH_PASSWORD to test security configuration[/yellow]")
+            log.write("")
+            return
+
+        # Test basic authentication
+        log.write("Testing basic authentication...")
+        cmd = [
+            "curl", "-s", "-k", "-w", "%{http_code}",
+            "-u", f"admin:{opensearch_password}",
+            "https://localhost:9200"
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        
+        if process.returncode == 0:
+            response = stdout.decode().strip()
+            # Extract HTTP status code (last 3 characters)
+            if len(response) >= 3:
+                status_code = response[-3:]
+                response_body = response[:-3]
+                if status_code == "200":
+                    log.write("[green]✓ Basic authentication successful[/green]")
+                    try:
+                        import json
+                        info = json.loads(response_body)
+                        if "version" in info and "distribution" in info["version"]:
+                            log.write(f"  OpenSearch version: {info['version']['number']}")
+                    except:
+                        pass
+                else:
+                    log.write(f"[red]✗ Basic authentication failed with status {status_code}[/red]")
+            else:
+                log.write("[red]✗ Unexpected response from OpenSearch[/red]")
+        else:
+            log.write(f"[red]✗ Failed to connect to OpenSearch: {stderr.decode().strip()}[/red]")
+
+        # Test security plugin account info
+        log.write("Testing security plugin account info...")
+        cmd = [
+            "curl", "-s", "-k", "-w", "%{http_code}",
+            "-u", f"admin:{opensearch_password}",
+            "https://localhost:9200/_plugins/_security/api/account"
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        
+        if process.returncode == 0:
+            response = stdout.decode().strip()
+            if len(response) >= 3:
+                status_code = response[-3:]
+                response_body = response[:-3]
+                if status_code == "200":
+                    log.write("[green]✓ Security plugin accessible[/green]")
+                    try:
+                        import json
+                        user_info = json.loads(response_body)
+                        if "user_name" in user_info:
+                            log.write(f"  Current user: {user_info['user_name']}")
+                        if "roles" in user_info:
+                            log.write(f"  Roles: {', '.join(user_info['roles'])}")
+                        if "tenants" in user_info:
+                            tenants = list(user_info['tenants'].keys())
+                            log.write(f"  Tenants: {', '.join(tenants)}")
+                    except:
+                        log.write("  Account info retrieved but couldn't parse JSON")
+                else:
+                    log.write(f"[red]✗ Security plugin returned status {status_code}[/red]")
+        else:
+            log.write(f"[red]✗ Failed to access security plugin: {stderr.decode().strip()}[/red]")
+
+        # Test internal users
+        log.write("Testing internal users configuration...")
+        cmd = [
+            "curl", "-s", "-k", "-w", "%{http_code}",
+            "-u", f"admin:{opensearch_password}",
+            "https://localhost:9200/_plugins/_security/api/internalusers"
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        
+        if process.returncode == 0:
+            response = stdout.decode().strip()
+            if len(response) >= 3:
+                status_code = response[-3:]
+                response_body = response[:-3]
+                if status_code == "200":
+                    try:
+                        import json
+                        users = json.loads(response_body)
+                        if "admin" in users:
+                            log.write("[green]✓ Admin user configured[/green]")
+                            admin_user = users["admin"]
+                            if admin_user.get("reserved"):
+                                log.write("  Admin user is reserved (protected)")
+                        log.write(f"  Total internal users: {len(users)}")
+                    except:
+                        log.write("[green]✓ Internal users endpoint accessible[/green]")
+                else:
+                    log.write(f"[red]✗ Internal users returned status {status_code}[/red]")
+        else:
+            log.write(f"[red]✗ Failed to access internal users: {stderr.decode().strip()}[/red]")
+
+        # Test authentication domains configuration
+        log.write("Testing authentication configuration...")
+        cmd = [
+            "curl", "-s", "-k", "-w", "%{http_code}",
+            "-u", f"admin:{opensearch_password}",
+            "https://localhost:9200/_plugins/_security/api/securityconfig"
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        
+        if process.returncode == 0:
+            response = stdout.decode().strip()
+            if len(response) >= 3:
+                status_code = response[-3:]
+                response_body = response[:-3]
+                if status_code == "200":
+                    try:
+                        import json
+                        config = json.loads(response_body)
+                        if "config" in config and "dynamic" in config["config"] and "authc" in config["config"]["dynamic"]:
+                            authc = config["config"]["dynamic"]["authc"]
+                            if "openid_auth_domain" in authc:
+                                log.write("[green]✓ OpenID Connect authentication domain configured[/green]")
+                                oidc_config = authc["openid_auth_domain"].get("http_authenticator", {}).get("config", {})
+                                if "openid_connect_url" in oidc_config:
+                                    log.write(f"  OIDC URL: {oidc_config['openid_connect_url']}")
+                                if "subject_key" in oidc_config:
+                                    log.write(f"  Subject key: {oidc_config['subject_key']}")
+                            if "basic_internal_auth_domain" in authc:
+                                log.write("[green]✓ Basic internal authentication domain configured[/green]")
+                            
+                            # Check for multi-tenancy
+                            if "kibana" in config["config"]["dynamic"]:
+                                kibana_config = config["config"]["dynamic"]["kibana"]
+                                if kibana_config.get("multitenancy_enabled"):
+                                    log.write("[green]✓ Multi-tenancy enabled[/green]")
+                        else:
+                            log.write("[yellow]⚠ Authentication configuration not found in expected format[/yellow]")
+                    except Exception as e:
+                        log.write("[green]✓ Security config endpoint accessible[/green]")
+                        log.write(f"  (Could not parse JSON: {str(e)[:50]}...)")
+                else:
+                    log.write(f"[red]✗ Security config returned status {status_code}[/red]")
+        else:
+            log.write(f"[red]✗ Failed to access security config: {stderr.decode().strip()}[/red]")
+
+        # Test indices with potential security filtering
+        log.write("Testing index access...")
+        cmd = [
+            "curl", "-s", "-k", "-w", "%{http_code}",
+            "-u", f"admin:{opensearch_password}",
+            "https://localhost:9200/_cat/indices?v"
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+        )
+        stdout, stderr = await process.communicate()
+        
+        if process.returncode == 0:
+            response = stdout.decode().strip()
+            if len(response) >= 3:
+                status_code = response[-3:]
+                response_body = response[:-3]
+                if status_code == "200":
+                    log.write("[green]✓ Index listing accessible[/green]")
+                    lines = response_body.strip().split('\n')
+                    if len(lines) > 1:  # Skip header
+                        indices_found = []
+                        for line in lines[1:]:
+                            if 'documents' in line:
+                                indices_found.append('documents')
+                            elif 'knowledge_filters' in line:
+                                indices_found.append('knowledge_filters')
+                            elif '.opendistro_security' in line:
+                                indices_found.append('.opendistro_security')
+                        if indices_found:
+                            log.write(f"  Key indices found: {', '.join(indices_found)}")
+                else:
+                    log.write(f"[red]✗ Index listing returned status {status_code}[/red]")
+        else:
+            log.write(f"[red]✗ Failed to list indices: {stderr.decode().strip()}[/red]")
+
+        log.write("")
+
 
 # Made with Bob
diff --git a/uv.lock b/uv.lock
index 87734b48..bd7da744 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.13"
 resolution-markers = [
     "sys_platform == 'darwin'",
@@ -1405,7 +1405,7 @@ wheels = [
 
 [[package]]
 name = "openrag"
-version = "0.1.0"
+version = "0.1.1"
 source = { editable = "." }
 dependencies = [
     { name = "agentd" },
diff --git a/warm_up_docling.py b/warm_up_docling.py
index 272768ce..c605bef5 100644
--- a/warm_up_docling.py
+++ b/warm_up_docling.py
@@ -1,6 +1,7 @@
-from docling.document_converter import DocumentConverter
 import logging
 
+from docling.document_converter import DocumentConverter
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)