From 99bf3772b50131ddbdee9e8c09435bb900f0ff3b Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Fri, 19 Sep 2025 12:16:20 -0600
Subject: [PATCH 01/19] updated header for chunks page

---
 frontend/src/app/knowledge/chunks/page.tsx | 67 ++++++++++++++++------
 1 file changed, 50 insertions(+), 17 deletions(-)
diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 9385c474..254eb511 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -1,8 +1,10 @@
 "use client";
 
 import {
+  ArrowLeft,
   Building2,
   Cloud,
+  File as FileIcon,
   FileText,
   HardDrive,
   Loader2,
@@ -21,6 +23,9 @@ import {
   type File,
   useGetSearchQuery,
 } from "../../api/queries/useGetSearchQuery";
+import { Label } from "@/components/ui/label";
+import { Checkbox } from "@/components/ui/checkbox";
+import { Input } from "@/components/ui/input";
 
 // Function to get the appropriate icon for a connector type
 function getSourceIcon(connectorType?: string) {
@@ -47,9 +52,14 @@ function ChunksPageContent() {
   const filename = searchParams.get("filename");
   const [chunks, setChunks] = useState<ChunkResult[]>([]);
 
+  const [selectAll, setSelectAll] = useState(false);
+  const [queryInputText, setQueryInputText] = useState(
+    parsedFilterData?.query ?? ""
+  );
+
   // Use the same search query as the knowledge page, but we'll filter for the specific file
   const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
-
+  console.log({ data });
   // Extract chunks for the specific file
   useEffect(() => {
     if (!filename || !(data as File[]).length) {
@@ -98,30 +108,53 @@ function ChunksPageContent() {
     >
       <div className="flex-1 flex flex-col min-h-0 px-6 py-6">
         {/* Header */}
-        <div className="flex items-center justify-between mb-6">
-          <div className="flex items-center gap-3">
-            <Button
-              variant="ghost"
-              size="sm"
-              onClick={handleBack}
-              className="text-muted-foreground hover:text-foreground px-2"
-            >
-              ← Back
+        <div className="flex flex-col mb-6">
+          <div className="flex items-center gap-3 mb-2">
+            <Button variant="ghost" onClick={handleBack}>
+              <ArrowLeft size={18} />
+              <FileIcon className="text-muted-foreground" size={18} />
+              <h1 className="text-lg font-semibold">
+                {filename.replace(/\.[^/.]+$/, "")}
+              </h1>
             </Button>
-            <div className="flex flex-col">
-              <h2 className="text-lg font-semibold">Document Chunks</h2>
-              <p className="text-sm text-muted-foreground truncate max-w-md">
-                {decodeURIComponent(filename)}
-              </p>
+          </div>
+          <div className="flex items-center gap-3 pl-6 mt-2">
+            <div className="flex items-center gap-2">
+              <Checkbox
+                id="selectAllChunks"
+                checked={selectAll}
+                onCheckedChange={(checked) => setSelectAll(checked === true)}
+              />
+              <Label
+                htmlFor="selectAllChunks"
+                className="font-medium text-muted-foreground whitespace-nowrap"
+              >
+                Select all
+              </Label>
+            </div>
+            <div className="flex-1 flex items-center gap-2">
+              <Input
+                name="search-query"
+                id="search-query"
+                type="text"
+                defaultValue={parsedFilterData?.query}
+                value={queryInputText}
+                onChange={(e) => setQueryInputText(e.target.value)}
+                placeholder="Search chunks..."
+                className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring"
+              />
+              <Button variant="outline" size="sm">
+                <Search />
+              </Button>
             </div>
           </div>
-          <div className="text-sm text-muted-foreground">
+          {/* <div className="text-sm text-muted-foreground">
             {!isFetching && chunks.length > 0 && (
               <span>
                 {chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found
               </span>
             )}
-          </div>
+          </div> */}
         </div>
 
         {/* Content Area - matches knowledge page structure */}

From 9fdec36e9c5c6a7369738c53904a49ed358b6ed5 Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Mon, 22 Sep 2025 13:06:19 -0600
Subject: [PATCH 02/19] make the search filter work

---
 frontend/src/app/knowledge/chunks/page.tsx | 107 +++++++++++----------
 1 file changed, 56 insertions(+), 51 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 254eb511..450032a8 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -2,18 +2,13 @@
 
 import {
   ArrowLeft,
-  Building2,
-  Cloud,
+  Copy,
   File as FileIcon,
-  FileText,
-  HardDrive,
   Loader2,
   Search,
 } from "lucide-react";
 import { Suspense, useCallback, useEffect, useState } from "react";
 import { useRouter, useSearchParams } from "next/navigation";
-import { SiGoogledrive } from "react-icons/si";
-import { TbBrandOnedrive } from "react-icons/tb";
 import { ProtectedRoute } from "@/components/protected-route";
 import { Button } from "@/components/ui/button";
 import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
@@ -27,22 +22,6 @@ import { Label } from "@/components/ui/label";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
 
-// Function to get the appropriate icon for a connector type
-function getSourceIcon(connectorType?: string) {
-  switch (connectorType) {
-    case "google_drive":
-      return <SiGoogledrive className="h-4 w-4 text-foreground" />;
-    case "onedrive":
-      return <TbBrandOnedrive className="h-4 w-4 text-foreground" />;
-    case "sharepoint":
-      return <Building2 className="h-4 w-4 text-foreground" />;
-    case "s3":
-      return <Cloud className="h-4 w-4 text-foreground" />;
-    default:
-      return <HardDrive className="h-4 w-4 text-muted-foreground" />;
-  }
-}
-
 function ChunksPageContent() {
   const router = useRouter();
   const searchParams = useSearchParams();
@@ -51,12 +30,32 @@ function ChunksPageContent() {
 
   const filename = searchParams.get("filename");
   const [chunks, setChunks] = useState<ChunkResult[]>([]);
+  const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
+    ChunkResult[]
+  >([]);
 
   const [selectAll, setSelectAll] = useState(false);
   const [queryInputText, setQueryInputText] = useState(
     parsedFilterData?.query ?? ""
   );
 
+  useEffect(() => {
+    if (queryInputText === "") {
+      setChunksFilteredByQuery(chunks);
+    } else {
+      setChunksFilteredByQuery((prevChunks) =>
+        prevChunks.filter((chunk) =>
+          chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
+        )
+      );
+    }
+  }, [queryInputText, chunks]);
+
+  const handleCopy = useCallback((text: string) => {
+    console.log("copying text to clipboard:", text);
+    navigator.clipboard.writeText(text);
+  }, []);
+
   // Use the same search query as the knowledge page, but we'll filter for the specific file
   const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
   console.log({ data });
@@ -118,7 +117,7 @@ function ChunksPageContent() {
               </h1>
             </Button>
           </div>
-          <div className="flex items-center gap-3 pl-6 mt-2">
+          <div className="flex items-center gap-3 pl-4 mt-2">
             <div className="flex items-center gap-2">
               <Checkbox
                 id="selectAllChunks"
@@ -148,13 +147,6 @@ function ChunksPageContent() {
               </Button>
             </div>
           </div>
-          {/* <div className="text-sm text-muted-foreground">
-            {!isFetching && chunks.length > 0 && (
-              <span>
-                {chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found
-              </span>
-            )}
-          </div> */}
         </div>
 
         {/* Content Area - matches knowledge page structure */}
@@ -180,35 +172,48 @@ function ChunksPageContent() {
             </div>
           ) : (
             <div className="space-y-4 pb-6">
-              {chunks.map((chunk, index) => (
+              {chunksFilteredByQuery.map((chunk, index) => (
                 <div
                   key={chunk.filename + index}
-                  className="bg-muted/20 rounded-lg p-4 border border-border/50"
+                  className="bg-muted rounded-lg p-4 border border-border/50"
                 >
                   <div className="flex items-center justify-between mb-2">
-                    <div className="flex items-center gap-2">
-                      <FileText className="h-4 w-4 text-blue-400" />
-                      <span className="font-medium truncate">
-                        {chunk.filename}
+                    <div className="flex items-center gap-3">
+                      <div>
+                        <Checkbox />
+                      </div>
+                      <span className="text-sm text-bold">
+                        Chunk {chunk.page}
                       </span>
-                      {chunk.connector_type && (
-                        <div className="ml-2">
-                          {getSourceIcon(chunk.connector_type)}
-                        </div>
-                      )}
+                      <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
+                        {chunk.text.length} chars
+                      </span>
+                      <div className="py-1">
+                        <Button
+                          className="p-1"
+                          onClick={() => handleCopy(chunk.text)}
+                          variant="ghost"
+                          size="xs"
+                        >
+                          <Copy className="text-muted-foreground" />
+                        </Button>
+                      </div>
                     </div>
-                    <span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
-                      {chunk.score.toFixed(2)}
-                    </span>
+
+                    {/* TODO: Update to use active toggle */}
+                    {/* <span className="px-2 py-1 text-green-500">
+                      <Switch
+                        className="ml-2 bg-green-500"
+                        checked={true}
+                      />
+                      Active
+                    </span> */}
                   </div>
-                  <div className="flex items-center gap-4 text-sm text-muted-foreground mb-3">
-                    <span>{chunk.mimetype}</span>
-                    <span>Page {chunk.page}</span>
-                    {chunk.owner_name && <span>Owner: {chunk.owner_name}</span>}
+                  <div>
+                    <blockquote className="text-sm text-muted-foreground leading-relaxed">
+                      {chunk.text}
+                    </blockquote>
                   </div>
-                  <p className="text-sm text-foreground/90 leading-relaxed">
-                    {chunk.text}
-                  </p>
                 </div>
               ))}
             </div>

From fc5f67e244f02f18523e95824bb7be13d006cdd9 Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Mon, 22 Sep 2025 14:23:03 -0600
Subject: [PATCH 03/19] Added technical details section

---
 frontend/src/app/knowledge/chunks/page.tsx | 112 +++++++++++++++++++--
 1 file changed, 101 insertions(+), 11 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 450032a8..0a5a00a3 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -7,7 +7,7 @@ import {
   Loader2,
   Search,
 } from "lucide-react";
-import { Suspense, useCallback, useEffect, useState } from "react";
+import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
 import { useRouter, useSearchParams } from "next/navigation";
 import { ProtectedRoute } from "@/components/protected-route";
 import { Button } from "@/components/ui/button";
@@ -22,6 +22,12 @@ import { Label } from "@/components/ui/label";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
 
+const getFileTypeLabel = (mimetype: string) => {
+  if (mimetype === "application/pdf") return "PDF";
+  if (mimetype === "text/plain") return "Text";
+  if (mimetype === "application/msword") return "Word Document";
+};
+
 function ChunksPageContent() {
   const router = useRouter();
   const searchParams = useSearchParams();
@@ -33,12 +39,21 @@ function ChunksPageContent() {
   const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
     ChunkResult[]
   >([]);
+  const averageChunkLength = useMemo(
+    () =>
+      chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
+        chunks.length || 0,
+    [chunks]
+  );
 
   const [selectAll, setSelectAll] = useState(false);
   const [queryInputText, setQueryInputText] = useState(
     parsedFilterData?.query ?? ""
   );
 
+  // Use the same search query as the knowledge page, but we'll filter for the specific file
+  const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
+
   useEffect(() => {
     if (queryInputText === "") {
       setChunksFilteredByQuery(chunks);
@@ -52,13 +67,14 @@ function ChunksPageContent() {
   }, [queryInputText, chunks]);
 
   const handleCopy = useCallback((text: string) => {
-    console.log("copying text to clipboard:", text);
     navigator.clipboard.writeText(text);
   }, []);
 
-  // Use the same search query as the knowledge page, but we'll filter for the specific file
-  const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
-  console.log({ data });
+  const fileData = (data as File[]).find(
+    (file: File) => file.filename === filename
+  );
+
+  console.log({ fileData });
   // Extract chunks for the specific file
   useEffect(() => {
     if (!filename || !(data as File[]).length) {
@@ -66,11 +82,8 @@ function ChunksPageContent() {
       return;
     }
 
-    const fileData = (data as File[]).find(
-      (file: File) => file.filename === filename
-    );
     setChunks(fileData?.chunks || []);
-  }, [data, filename]);
+  }, [data, filename, fileData?.chunks]);
 
   const handleBack = useCallback(() => {
     router.back();
@@ -90,9 +103,11 @@ function ChunksPageContent() {
     );
   }
 
+  console.log({ data });
+
   return (
     <div
-      className={`fixed inset-0 md:left-72 top-[53px] flex flex-col transition-all duration-300 ${
+      className={`fixed inset-0 md:left-72 top-[53px] flex flex-row transition-all duration-300 ${
         isMenuOpen && isPanelOpen
           ? "md:right-[704px]"
           : // Both open: 384px (menu) + 320px (KF panel)
@@ -210,7 +225,7 @@ function ChunksPageContent() {
                     </span> */}
                   </div>
                   <div>
-                    <blockquote className="text-sm text-muted-foreground leading-relaxed">
+                    <blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-color-input ml-1.5 pl-4">
                       {chunk.text}
                     </blockquote>
                   </div>
@@ -220,6 +235,81 @@ function ChunksPageContent() {
           )}
         </div>
       </div>
+      {/* Right panel - Summary (TODO), Technical details,  */}
+      <div className="w-[320px] py-20 px-2">
+        <div className="mb-8">
+          <h2 className="text-xl font-semibold mt-3 mb-4">Technical details</h2>
+          <dl>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Total chunks</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {chunks.length}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {averageChunkLength.toFixed(0)} chars
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Process time</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {/* {averageChunkLength.toFixed(0)} chars */}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Model</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {/* {averageChunkLength.toFixed(0)} chars */}
+              </dd>
+            </div>
+          </dl>
+        </div>
+        <div className="mb-8">
+          <h2 className="text-xl font-semibold mt-2 mb-3">Original document</h2>
+          <dl>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Name</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {fileData?.filename}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Type</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Size</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {fileData?.size
+                  ? `${Math.round(fileData.size / 1024)} KB`
+                  : "Unknown"}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {fileData?.uploaded || "Unknown"}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Source</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                {/* {fileData?.uploaded || "Unknown"} */}
+              </dd>
+            </div>
+            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+              <dt className="text-sm/6 text-muted-foreground">Updated</dt>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
+                N/A
+              </dd>
+            </div>
+          </dl>
+        </div>
+      </div>
     </div>
   );
 }

From 13f75411900eedb0dee46a960f72c6271ec0f102 Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Mon, 22 Sep 2025 14:23:50 -0600
Subject: [PATCH 04/19] finish stubbing data

---
 frontend/src/app/knowledge/chunks/page.tsx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 0a5a00a3..e9b66187 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -292,14 +292,12 @@ function ChunksPageContent() {
             <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                {fileData?.uploaded || "Unknown"}
+                N/A
               </dd>
             </div>
             <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Source</dt>
-              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                {/* {fileData?.uploaded || "Unknown"} */}
-              </dd>
+              <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
             </div>
             <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Updated</dt>

From 8a17cccf3d7fc87e706bfb26ebaa9c7aecda3bd4 Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Mon, 22 Sep 2025 14:29:13 -0600
Subject: [PATCH 05/19] remove console.logs

---
 frontend/src/app/knowledge/chunks/page.tsx | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index e9b66187..b59a8760 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -74,7 +74,6 @@ function ChunksPageContent() {
     (file: File) => file.filename === filename
   );
 
-  console.log({ fileData });
   // Extract chunks for the specific file
   useEffect(() => {
     if (!filename || !(data as File[]).length) {
@@ -103,8 +102,6 @@ function ChunksPageContent() {
     );
   }
 
-  console.log({ data });
-
   return (
     <div
       className={`fixed inset-0 md:left-72 top-[53px] flex flex-row transition-all duration-300 ${

From e67fc21a9bcc46931ff97527eb90efa6feb0e1ec Mon Sep 17 00:00:00 2001
From: Mike Fortman <michael.fortman@datastax.com>
Date: Mon, 22 Sep 2025 16:58:12 -0500
Subject: [PATCH 06/19] update flow based on docling presets

---
 src/api/settings.py    | 99 ++++++++++++++++++++++++++++++++++++++++++
 src/config/settings.py |  3 ++
 src/main.py            | 13 +++++-
 3 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/src/api/settings.py b/src/api/settings.py
index c169b263..9723cdeb 100644
--- a/src/api/settings.py
+++ b/src/api/settings.py
@@ -7,6 +7,7 @@ from config.settings import (
     LANGFLOW_CHAT_FLOW_ID,
     LANGFLOW_INGEST_FLOW_ID,
     LANGFLOW_PUBLIC_URL,
+    DOCLING_COMPONENT_ID,
     clients,
     get_openrag_config,
     config_manager,
@@ -234,6 +235,15 @@ async def update_settings(request, session_manager):
             current_config.knowledge.doclingPresets = body["doclingPresets"]
             config_updated = True
 
+            # Also update the flow with the new docling preset
+            try:
+                await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]])
+                logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'")
+            except Exception as e:
+                logger.error(f"Failed to update docling preset in flow: {str(e)}")
+                # Don't fail the entire settings update if flow update fails
+                # The config will still be saved
+
         if "chunk_size" in body:
             if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
                 return JSONResponse(
@@ -527,3 +537,92 @@ async def onboarding(request, flows_service):
             {"error": f"Failed to update onboarding settings: {str(e)}"},
             status_code=500,
         )
+
+
+async def _update_flow_docling_preset(preset: str, preset_config: dict):
+    """Helper function to update docling preset in the ingest flow"""
+    if not LANGFLOW_INGEST_FLOW_ID:
+        raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured")
+
+    # Get the current flow data from Langflow
+    response = await clients.langflow_request(
+        "GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}"
+    )
+
+    if response.status_code != 200:
+        raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}")
+
+    flow_data = response.json()
+
+    # Find the target node in the flow using environment variable
+    nodes = flow_data.get("data", {}).get("nodes", [])
+    target_node = None
+    target_node_index = None
+
+    for i, node in enumerate(nodes):
+        if node.get("id") == DOCLING_COMPONENT_ID:
+            target_node = node
+            target_node_index = i
+            break
+
+    if target_node is None:
+        raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow")
+
+    # Update the docling_serve_opts value directly in the existing node
+    if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")):
+        flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config
+    else:
+        raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'")
+
+    # Update the flow via PATCH request
+    patch_response = await clients.langflow_request(
+        "PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data
+    )
+
+    if patch_response.status_code != 200:
+        raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}")
+
+
+async def update_docling_preset(request, session_manager):
+    """Update docling preset in the ingest flow"""
+    try:
+        # Parse request body
+        body = await request.json()
+
+        # Validate preset parameter
+        if "preset" not in body:
+            return JSONResponse(
+                {"error": "preset parameter is required"},
+                status_code=400
+            )
+
+        preset = body["preset"]
+        preset_configs = get_docling_preset_configs()
+
+        if preset not in preset_configs:
+            valid_presets = list(preset_configs.keys())
+            return JSONResponse(
+                {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"},
+                status_code=400
+            )
+
+        # Get the preset configuration
+        preset_config = preset_configs[preset]
+
+        # Use the helper function to update the flow
+        await _update_flow_docling_preset(preset, preset_config)
+
+        logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
+
+        return JSONResponse({
+            "message": f"Successfully updated docling preset to '{preset}'",
+            "preset": preset,
+            "preset_config": preset_config
+        })
+
+    except Exception as e:
+        logger.error("Failed to update docling preset", error=str(e))
+        return JSONResponse(
+            {"error": f"Failed to update docling preset: {str(e)}"},
+            status_code=500
+        )
diff --git a/src/config/settings.py b/src/config/settings.py
index 66f78ce5..11e4b835 100644
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -528,6 +528,9 @@ OLLAMA_EMBEDDING_COMPONENT_ID = os.getenv(
 )
 OLLAMA_LLM_COMPONENT_ID = os.getenv("OLLAMA_LLM_COMPONENT_ID", "OllamaModel-eCsJx")
 
+# Docling component ID for ingest flow
+DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX")
+
 # Global clients instance
 clients = AppClients()
 
diff --git a/src/main.py b/src/main.py
index e7cca718..f78e07bc 100644
--- a/src/main.py
+++ b/src/main.py
@@ -971,12 +971,23 @@ async def create_app():
             "/onboarding",
             require_auth(services["session_manager"])(
                 partial(
-                    settings.onboarding, 
+                    settings.onboarding,
                     flows_service=services["flows_service"]
                 )
             ),
             methods=["POST"],
         ),
+        # Docling preset update endpoint
+        Route(
+            "/settings/docling-preset",
+            require_auth(services["session_manager"])(
+                partial(
+                    settings.update_docling_preset,
+                    session_manager=services["session_manager"]
+                )
+            ),
+            methods=["PATCH"],
+        ),
         Route(
             "/nudges",
             require_auth(services["session_manager"])(

From f861f952201a8d09b75dfd4af5d3a7978aa229a1 Mon Sep 17 00:00:00 2001
From: Mike Fortman <michael.fortman@datastax.com>
Date: Mon, 22 Sep 2025 17:05:46 -0500
Subject: [PATCH 07/19] add docling ingest flow

---
 .env.example                      |    2 +
 flows/openrag_ingest_docling.json | 2220 +++++++++++++++++++++++++++++
 2 files changed, 2222 insertions(+)
 create mode 100644 flows/openrag_ingest_docling.json

diff --git a/.env.example b/.env.example
index 45b7676b..fe908795 100644
--- a/.env.example
+++ b/.env.example
@@ -8,6 +8,8 @@ LANGFLOW_SECRET_KEY=
 # flow ids for chat and ingestion flows
 LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
 LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
+# Ingest flow using docling
+LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915
 NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c
 
 # Set a strong admin password for OpenSearch; a bcrypt hash is generated at
diff --git a/flows/openrag_ingest_docling.json b/flows/openrag_ingest_docling.json
new file mode 100644
index 00000000..cd6d7d39
--- /dev/null
+++ b/flows/openrag_ingest_docling.json
@@ -0,0 +1,2220 @@
+{
+  "data": {
+    "edges": [
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "SplitText",
+            "id": "SplitText-3ZI5B",
+            "name": "dataframe",
+            "output_types": [
+              "DataFrame"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "ingest_data",
+            "id": "OpenSearchHybrid-XtKoA",
+            "inputTypes": [
+              "Data",
+              "DataFrame"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "reactflow__edge-SplitText-3ZI5B{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-3ZI5Bœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-OpenSearchHybrid-XtKoA{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "SplitText-3ZI5B",
+        "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-3ZI5Bœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}",
+        "target": "OpenSearchHybrid-XtKoA",
+        "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "OpenAIEmbeddings",
+            "id": "OpenAIEmbeddings-mP45L",
+            "name": "embeddings",
+            "output_types": [
+              "Embeddings"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "embedding",
+            "id": "OpenSearchHybrid-XtKoA",
+            "inputTypes": [
+              "Embeddings"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "reactflow__edge-OpenAIEmbeddings-mP45L{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-mP45Lœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-OpenSearchHybrid-XtKoA{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "OpenAIEmbeddings-mP45L",
+        "sourceHandle": "{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-mP45Lœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}",
+        "target": "OpenSearchHybrid-XtKoA",
+        "targetHandle": "{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "DoclingRemote",
+            "id": "DoclingRemote-78KoX",
+            "name": "dataframe",
+            "output_types": [
+              "DataFrame"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "data_inputs",
+            "id": "ExportDoclingDocument-xFoCI",
+            "inputTypes": [
+              "Data",
+              "DataFrame"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__DoclingRemote-78KoX{œdataTypeœ:œDoclingRemoteœ,œidœ:œDoclingRemote-78KoXœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ExportDoclingDocument-xFoCI{œfieldNameœ:œdata_inputsœ,œidœ:œExportDoclingDocument-xFoCIœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "DoclingRemote-78KoX",
+        "sourceHandle": "{œdataTypeœ:œDoclingRemoteœ,œidœ:œDoclingRemote-78KoXœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}",
+        "target": "ExportDoclingDocument-xFoCI",
+        "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œExportDoclingDocument-xFoCIœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "className": "",
+        "data": {
+          "sourceHandle": {
+            "dataType": "ExportDoclingDocument",
+            "id": "ExportDoclingDocument-xFoCI",
+            "name": "data",
+            "output_types": [
+              "Data"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "data_inputs",
+            "id": "SplitText-3ZI5B",
+            "inputTypes": [
+              "Data",
+              "DataFrame",
+              "Message"
+            ],
+            "type": "other"
+          }
+        },
+        "id": "xy-edge__ExportDoclingDocument-xFoCI{œdataTypeœ:œExportDoclingDocumentœ,œidœ:œExportDoclingDocument-xFoCIœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-3ZI5B{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-3ZI5Bœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
+        "selected": false,
+        "source": "ExportDoclingDocument-xFoCI",
+        "sourceHandle": "{œdataTypeœ:œExportDoclingDocumentœ,œidœ:œExportDoclingDocument-xFoCIœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}",
+        "target": "SplitText-3ZI5B",
+        "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-3ZI5Bœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}"
+      }
+    ],
+    "nodes": [
+      {
+        "data": {
+          "description": "Split text into chunks based on specified criteria.",
+          "display_name": "Split Text",
+          "id": "SplitText-3ZI5B",
+          "node": {
+            "base_classes": [
+              "DataFrame"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Split text into chunks based on specified criteria.",
+            "display_name": "Split Text",
+            "documentation": "https://docs.langflow.org/components-processing#split-text",
+            "edited": true,
+            "field_order": [
+              "data_inputs",
+              "chunk_overlap",
+              "chunk_size",
+              "separator",
+              "text_key",
+              "keep_separator"
+            ],
+            "frozen": false,
+            "icon": "scissors-line-dashed",
+            "legacy": false,
+            "lf_version": "1.6.0",
+            "metadata": {
+              "code_hash": "65a90e1f4fe6",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langchain_text_splitters",
+                    "version": "0.3.9"
+                  },
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.split_text"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Chunks",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "split_text",
+                "name": "dataframe",
+                "options": null,
+                "required_inputs": null,
+                "selected": "DataFrame",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "chunk_overlap": {
+                "_input_type": "IntInput",
+                "advanced": false,
+                "display_name": "Chunk Overlap",
+                "dynamic": false,
+                "info": "Number of characters to overlap between chunks.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "chunk_overlap",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 200
+              },
+              "chunk_size": {
+                "_input_type": "IntInput",
+                "advanced": false,
+                "display_name": "Chunk Size",
+                "dynamic": false,
+                "info": "The maximum length of each chunk. Text is first split by separator, then chunks are merged up to this size. Individual splits larger than this won't be further divided.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "chunk_size",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1000
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n    display_name: str = \"Split Text\"\n    description: str = \"Split text into chunks based on specified criteria.\"\n    documentation: str = \"https://docs.langflow.org/components-processing#split-text\"\n    icon = \"scissors-line-dashed\"\n    name = \"SplitText\"\n\n    inputs = [\n        HandleInput(\n            name=\"data_inputs\",\n            display_name=\"Input\",\n            info=\"The data with texts to split in chunks.\",\n            input_types=[\"Data\", \"DataFrame\", \"Message\"],\n            required=True,\n        ),\n        IntInput(\n            name=\"chunk_overlap\",\n            display_name=\"Chunk Overlap\",\n            info=\"Number of characters to overlap between chunks.\",\n            value=200,\n        ),\n        IntInput(\n            name=\"chunk_size\",\n            display_name=\"Chunk Size\",\n            info=(\n                \"The maximum length of each chunk. Text is first split by separator, \"\n                \"then chunks are merged up to this size. \"\n                \"Individual splits larger than this won't be further divided.\"\n            ),\n            value=1000,\n        ),\n        MessageTextInput(\n            name=\"separator\",\n            display_name=\"Separator\",\n            info=(\n                \"The character to split on. Use \\\\n for newline. \"\n                \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n            ),\n            value=\"\\n\",\n        ),\n        MessageTextInput(\n            name=\"text_key\",\n            display_name=\"Text Key\",\n            info=\"The key to use for the text column.\",\n            value=\"text\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"keep_separator\",\n            display_name=\"Keep Separator\",\n            info=\"Whether to keep the separator in the output chunks and where to place it.\",\n            options=[\"False\", \"True\", \"Start\", \"End\"],\n            value=\"False\",\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n    ]\n\n    def _docs_to_data(self, docs) -> list[Data]:\n        data_list = [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n        return data_list\n\n    def _fix_separator(self, separator: str) -> str:\n        \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n        if separator == \"/n\":\n            return \"\\n\"\n        if separator == \"/t\":\n            return \"\\t\"\n        return separator\n\n    def split_text_base(self):\n        separator = self._fix_separator(self.separator)\n        separator = unescape_string(separator)\n\n        if isinstance(self.data_inputs, DataFrame):\n            if not len(self.data_inputs):\n                msg = \"DataFrame is empty\"\n                raise TypeError(msg)\n\n            self.data_inputs.text_key = self.text_key\n            try:\n                documents = self.data_inputs.to_lc_documents()\n            except Exception as e:\n                msg = f\"Error converting DataFrame to documents: {e}\"\n                raise TypeError(msg) from e\n        elif isinstance(self.data_inputs, Message):\n            self.data_inputs = [self.data_inputs.to_data()]\n            return self.split_text_base()\n        else:\n            if not self.data_inputs:\n                msg = \"No data inputs provided\"\n                raise TypeError(msg)\n\n            documents = []\n            if isinstance(self.data_inputs, Data):\n                self.data_inputs.text_key = self.text_key\n                documents = [self.data_inputs.to_lc_document()]\n            else:\n                try:\n                    documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n                    if not documents:\n                        msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n                        raise TypeError(msg)\n                except AttributeError as e:\n                    msg = f\"Invalid input type in collection: {e}\"\n                    raise TypeError(msg) from e\n        try:\n            # Convert string 'False'/'True' to boolean\n            keep_sep = self.keep_separator\n            if isinstance(keep_sep, str):\n                if keep_sep.lower() == \"false\":\n                    keep_sep = False\n                elif keep_sep.lower() == \"true\":\n                    keep_sep = True\n                # 'start' and 'end' are kept as strings\n            self.log(documents)\n            splitter = CharacterTextSplitter(\n                chunk_overlap=self.chunk_overlap,\n                chunk_size=self.chunk_size,\n                separator=separator,\n                keep_separator=keep_sep,\n            )\n            return splitter.split_documents(documents)\n        except Exception as e:\n            msg = f\"Error splitting text: {e}\"\n            raise TypeError(msg) from e\n\n    def split_text(self) -> DataFrame:\n        return DataFrame(self._docs_to_data(self.split_text_base()))\n"
+              },
+              "data_inputs": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Input",
+                "dynamic": false,
+                "info": "The data with texts to split in chunks.",
+                "input_types": [
+                  "Data",
+                  "DataFrame",
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "data_inputs",
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "keep_separator": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Keep Separator",
+                "dynamic": false,
+                "info": "Whether to keep the separator in the output chunks and where to place it.",
+                "name": "keep_separator",
+                "options": [
+                  "False",
+                  "True",
+                  "Start",
+                  "End"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "False"
+              },
+              "separator": {
+                "_input_type": "MessageTextInput",
+                "advanced": false,
+                "display_name": "Separator",
+                "dynamic": false,
+                "info": "The character to split on. Use \\n for newline. Examples: \\n\\n for paragraphs, \\n for lines, . for sentences",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "separator",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "\n"
+              },
+              "text_key": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Text Key",
+                "dynamic": false,
+                "info": "The key to use for the text column.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "text_key",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "text"
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "chunks",
+          "type": "SplitText"
+        },
+        "dragging": false,
+        "height": 475,
+        "id": "SplitText-3ZI5B",
+        "measured": {
+          "height": 475,
+          "width": 320
+        },
+        "position": {
+          "x": 1729.1788373023007,
+          "y": 1330.8003441546418
+        },
+        "positionAbsolute": {
+          "x": 1683.4543896546102,
+          "y": 1350.7871623588553
+        },
+        "selected": false,
+        "type": "genericNode",
+        "width": 320
+      },
+      {
+        "data": {
+          "id": "OpenAIEmbeddings-mP45L",
+          "node": {
+            "base_classes": [
+              "Embeddings"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Generate embeddings using OpenAI models.",
+            "display_name": "OpenAI Embeddings",
+            "documentation": "",
+            "edited": false,
+            "field_order": [
+              "default_headers",
+              "default_query",
+              "chunk_size",
+              "client",
+              "deployment",
+              "embedding_ctx_length",
+              "max_retries",
+              "model",
+              "model_kwargs",
+              "openai_api_key",
+              "openai_api_base",
+              "openai_api_type",
+              "openai_api_version",
+              "openai_organization",
+              "openai_proxy",
+              "request_timeout",
+              "show_progress_bar",
+              "skip_empty",
+              "tiktoken_model_name",
+              "tiktoken_enable",
+              "dimensions"
+            ],
+            "frozen": false,
+            "icon": "OpenAI",
+            "legacy": false,
+            "metadata": {
+              "code_hash": "8a658ed6d4c9",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langchain_openai",
+                    "version": "0.3.23"
+                  },
+                  {
+                    "name": "lfx",
+                    "version": null
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.openai_embeddings"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Embedding Model",
+                "group_outputs": false,
+                "method": "build_embeddings",
+                "name": "embeddings",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Embeddings",
+                "tool_mode": true,
+                "types": [
+                  "Embeddings"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "chunk_size": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Chunk Size",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "chunk_size",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1000
+              },
+              "client": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Client",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "client",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from langchain_openai import OpenAIEmbeddings\n\nfrom lfx.base.embeddings.model import LCEmbeddingsModel\nfrom lfx.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom lfx.field_typing import Embeddings\nfrom lfx.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n    display_name = \"OpenAI Embeddings\"\n    description = \"Generate embeddings using OpenAI models.\"\n    icon = \"OpenAI\"\n    name = \"OpenAIEmbeddings\"\n\n    inputs = [\n        DictInput(\n            name=\"default_headers\",\n            display_name=\"Default Headers\",\n            advanced=True,\n            info=\"Default headers to use for the API request.\",\n        ),\n        DictInput(\n            name=\"default_query\",\n            display_name=\"Default Query\",\n            advanced=True,\n            info=\"Default query parameters to use for the API request.\",\n        ),\n        IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n        MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n        MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n        IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n        IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n        DropdownInput(\n            name=\"model\",\n            display_name=\"Model\",\n            advanced=False,\n            options=OPENAI_EMBEDDING_MODEL_NAMES,\n            value=\"text-embedding-3-small\",\n        ),\n        DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n        SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\", required=True),\n        MessageTextInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n        MessageTextInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n        MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n        MessageTextInput(\n            name=\"openai_organization\",\n            display_name=\"OpenAI Organization\",\n            advanced=True,\n        ),\n        MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n        FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n        BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n        BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n        MessageTextInput(\n            name=\"tiktoken_model_name\",\n            display_name=\"TikToken Model Name\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"tiktoken_enable\",\n            display_name=\"TikToken Enable\",\n            advanced=True,\n            value=True,\n            info=\"If False, you must have transformers installed.\",\n        ),\n        IntInput(\n            name=\"dimensions\",\n            display_name=\"Dimensions\",\n            info=\"The number of dimensions the resulting output embeddings should have. \"\n            \"Only supported by certain models.\",\n            advanced=True,\n        ),\n    ]\n\n    def build_embeddings(self) -> Embeddings:\n        return OpenAIEmbeddings(\n            client=self.client or None,\n            model=self.model,\n            dimensions=self.dimensions or None,\n            deployment=self.deployment or None,\n            api_version=self.openai_api_version or None,\n            base_url=self.openai_api_base or None,\n            openai_api_type=self.openai_api_type or None,\n            openai_proxy=self.openai_proxy or None,\n            embedding_ctx_length=self.embedding_ctx_length,\n            api_key=self.openai_api_key or None,\n            organization=self.openai_organization or None,\n            allowed_special=\"all\",\n            disallowed_special=\"all\",\n            chunk_size=self.chunk_size,\n            max_retries=self.max_retries,\n            timeout=self.request_timeout or None,\n            tiktoken_enabled=self.tiktoken_enable,\n            tiktoken_model_name=self.tiktoken_model_name or None,\n            show_progress_bar=self.show_progress_bar,\n            model_kwargs=self.model_kwargs,\n            skip_empty=self.skip_empty,\n            default_headers=self.default_headers or None,\n            default_query=self.default_query or None,\n        )\n"
+              },
+              "default_headers": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Default Headers",
+                "dynamic": false,
+                "info": "Default headers to use for the API request.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "default_headers",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "default_query": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Default Query",
+                "dynamic": false,
+                "info": "Default query parameters to use for the API request.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "default_query",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "deployment": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Deployment",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "deployment",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "dimensions": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Dimensions",
+                "dynamic": false,
+                "info": "The number of dimensions the resulting output embeddings should have. Only supported by certain models.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "dimensions",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": ""
+              },
+              "embedding_ctx_length": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Embedding Context Length",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "embedding_ctx_length",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 1536
+              },
+              "max_retries": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Max Retries",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "max_retries",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 3
+              },
+              "model": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Model",
+                "dynamic": false,
+                "info": "",
+                "name": "model",
+                "options": [
+                  "text-embedding-3-small",
+                  "text-embedding-3-large",
+                  "text-embedding-ada-002"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "text-embedding-3-small"
+              },
+              "model_kwargs": {
+                "_input_type": "DictInput",
+                "advanced": true,
+                "display_name": "Model Kwargs",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "model_kwargs",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "type": "dict",
+                "value": {}
+              },
+              "openai_api_base": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Base",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "openai_api_base",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_api_key": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "OpenAI API Key",
+                "dynamic": false,
+                "info": "",
+                "input_types": [],
+                "load_from_db": false,
+                "name": "openai_api_key",
+                "password": true,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "str",
+                "value": ""
+              },
+              "openai_api_type": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Type",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "openai_api_type",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_api_version": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI API Version",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "openai_api_version",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_organization": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI Organization",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "openai_organization",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "openai_proxy": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "OpenAI Proxy",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "openai_proxy",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "request_timeout": {
+                "_input_type": "FloatInput",
+                "advanced": true,
+                "display_name": "Request Timeout",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "request_timeout",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "float",
+                "value": ""
+              },
+              "show_progress_bar": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Show Progress Bar",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "show_progress_bar",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "skip_empty": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Skip Empty",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "skip_empty",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "tiktoken_enable": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "TikToken Enable",
+                "dynamic": false,
+                "info": "If False, you must have transformers installed.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "tiktoken_enable",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "tiktoken_model_name": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "TikToken Model Name",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "tiktoken_model_name",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "embeddings",
+          "type": "OpenAIEmbeddings"
+        },
+        "dragging": false,
+        "height": 320,
+        "id": "OpenAIEmbeddings-mP45L",
+        "measured": {
+          "height": 320,
+          "width": 320
+        },
+        "position": {
+          "x": 1704.8491676318172,
+          "y": 1879.144249471858
+        },
+        "positionAbsolute": {
+          "x": 1690.9220896443658,
+          "y": 1866.483269483266
+        },
+        "selected": false,
+        "type": "genericNode",
+        "width": 320
+      },
+      {
+        "data": {
+          "id": "note-59mzY",
+          "node": {
+            "description": "### 💡 Add your OpenAI API key here 👇",
+            "display_name": "",
+            "documentation": "",
+            "template": {
+              "backgroundColor": "transparent"
+            }
+          },
+          "type": "note"
+        },
+        "dragging": false,
+        "height": 324,
+        "id": "note-59mzY",
+        "measured": {
+          "height": 324,
+          "width": 324
+        },
+        "position": {
+          "x": 1692.2322233423606,
+          "y": 1821.9077961087607
+        },
+        "positionAbsolute": {
+          "x": 1692.2322233423606,
+          "y": 1821.9077961087607
+        },
+        "selected": false,
+        "type": "noteNode",
+        "width": 324
+      },
+      {
+        "data": {
+          "id": "OpenSearchHybrid-XtKoA",
+          "node": {
+            "base_classes": [
+              "Data",
+              "DataFrame",
+              "VectorStore"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.",
+            "display_name": "OpenSearch (Hybrid)",
+            "documentation": "",
+            "edited": true,
+            "field_order": [
+              "docs_metadata",
+              "opensearch_url",
+              "index_name",
+              "engine",
+              "space_type",
+              "ef_construction",
+              "m",
+              "ingest_data",
+              "search_query",
+              "should_cache_vector_store",
+              "embedding",
+              "vector_field",
+              "number_of_results",
+              "filter_expression",
+              "auth_mode",
+              "username",
+              "password",
+              "jwt_token",
+              "jwt_header",
+              "bearer_prefix",
+              "use_ssl",
+              "verify_certs"
+            ],
+            "frozen": false,
+            "icon": "OpenSearch",
+            "legacy": false,
+            "metadata": {
+              "code_hash": "deee3f04cb47",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "langflow",
+                    "version": "1.5.0.post2"
+                  },
+                  {
+                    "name": "opensearchpy",
+                    "version": "2.8.0"
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "custom_components.opensearch_hybrid"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Search Results",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "search_documents",
+                "name": "search_results",
+                "options": null,
+                "required_inputs": null,
+                "selected": "Data",
+                "tool_mode": true,
+                "types": [
+                  "Data"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "DataFrame",
+                "group_outputs": false,
+                "hidden": null,
+                "method": "as_dataframe",
+                "name": "dataframe",
+                "options": null,
+                "required_inputs": null,
+                "selected": "DataFrame",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Vector Store Connection",
+                "group_outputs": false,
+                "hidden": true,
+                "method": "as_vector_store",
+                "name": "vectorstoreconnection",
+                "options": null,
+                "required_inputs": null,
+                "selected": "VectorStore",
+                "tool_mode": true,
+                "types": [
+                  "VectorStore"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "auth_mode": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Auth Mode",
+                "dynamic": false,
+                "info": "Choose Basic (username/password) or JWT (Bearer token).",
+                "load_from_db": false,
+                "name": "auth_mode",
+                "options": [
+                  "basic",
+                  "jwt"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "jwt"
+              },
+              "bearer_prefix": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Prefix 'Bearer '",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "bearer_prefix",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any, Dict, List, Optional\n\nfrom langflow.base.vectorstores.model import (\n    LCVectorStoreComponent,\n    check_cached_vector_store,\n)\nfrom langflow.base.vectorstores.vector_store_connection_decorator import (\n    vector_store_connection,\n)\nfrom langflow.io import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    MultilineInput,\n    SecretStrInput,\n    StrInput,\n    TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom opensearchpy import OpenSearch, helpers\n\n\n@vector_store_connection\nclass OpenSearchHybridComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch hybrid search: KNN (k=10, boost=0.7) + multi_match (boost=0.3) with optional filters & min_score.\"\"\"\n\n    display_name: str = \"OpenSearch (Hybrid)\"\n    name: str = \"OpenSearchHybrid\"\n    icon: str = \"OpenSearch\"\n    description: str = \"Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.\"\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[\n            i.name for i in LCVectorStoreComponent.inputs\n        ],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Ingestion Metadata\",\n            info=\"Key value pairs to be inserted into each ingested document.\",\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            advanced=True,\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=\"URL for your OpenSearch cluster.\",\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=\"The index to search.\",\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=\"Vector search engine to use.\",\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Space Type\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=\"Distance metric for vector similarity.\",\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=\"Size of the dynamic list used during k-NN graph creation.\",\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=\"Number of bidirectional links created for each new element.\",\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(\n            name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]\n        ),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Vector field used for KNN.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Size (limit)\",\n            value=10,\n            advanced=True,\n            info=\"Default number of hits when no limit provided in filter_expression.\",\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Filter Expression (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON to control filters/limit/score threshold.\\n\"\n                \"Accepted shapes:\\n\"\n                '1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\\n'\n                '2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\\n'\n                \"Placeholders with __IMPOSSIBLE_VALUE__ are ignored.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Auth Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=True,\n            show=True,\n            info=\"Paste a valid JWT (sent as a header).\",\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(name=\"use_ssl\", display_name=\"Use SSL\", value=True, advanced=True),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify Certificates\",\n            value=False,\n            advanced=True,\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> Dict[str, Any]:\n        \"\"\"For Approximate k-NN Search, this is the default mapping to create index.\"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate AOSS with the engine.\"\"\"\n        if is_aoss and engine != \"nmslib\" and engine != \"faiss\":\n            raise ValueError(\n                \"Amazon OpenSearch Service Serverless only \"\n                \"supports `nmslib` or `faiss` engines\"\n            )\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Check if the service is http_auth is set as `aoss`.\"\"\"\n        if (\n            http_auth is not None\n            and hasattr(http_auth, \"service\")\n            and http_auth.service == \"aoss\"\n        ):\n            return True\n        return False\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: List[List[float]],\n        texts: List[str],\n        metadatas: Optional[List[dict]] = None,\n        ids: Optional[List[str]] = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: Optional[Dict] = None,\n        max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,\n        is_aoss: bool = False,\n    ) -> List[str]:\n        \"\"\"Bulk Ingest Embeddings into given index.\"\"\"\n        if not mapping:\n            mapping = dict()\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        self.log(metadatas[i])\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> Dict[str, Any]:\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            for item in self.docs_metadata:\n                if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                    additional_metadata[item[\"key\"]] = item[\"value\"]\n\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            raise ValueError(\"Embedding handle is required to embed documents.\")\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss, engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(\n            f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\"\n        )\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> List[dict]:\n        \"\"\"\n        Accepts either:\n          A) {\"filter\":[ ...term/terms objects... ], \"limit\":..., \"score_threshold\":...}\n          B) Context-style: {\"data_sources\":[...], \"document_types\":[...], \"owners\":[...]}\n        Returns a list of OS filter clauses (term/terms), skipping placeholders and empty terms.\n        \"\"\"\n\n        if not filter_obj:\n            return []\n\n        # If it’s a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except Exception:\n                # Not valid JSON → treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            clauses: List[dict] = []\n            for f in raw or []:\n                if (\n                    \"term\" in f\n                    and isinstance(f[\"term\"], dict)\n                    and not self._is_placeholder_term(f[\"term\"])\n                ):\n                    clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        clauses.append(f)\n            return clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        clauses: List[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    clauses.append({\"term\": {field: values[0]}})\n            else:\n                clauses.append({\"terms\": {field: values}})\n        return clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                raise ValueError(f\"Invalid filter_expression JSON: {e}\") from e\n\n        if not self.embedding:\n            raise ValueError(\n                \"Embedding is required to run hybrid search (KNN + keyword).\"\n            )\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(\n        self, build_config: dict, field_value: str, field_name: str | None = None\n    ) -> dict:\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n            return build_config\n\n        except Exception as e:\n            self.log(f\"update_build_config error: {e}\")\n            return build_config\n"
+              },
+              "docs_metadata": {
+                "_input_type": "TableInput",
+                "advanced": true,
+                "display_name": "Ingestion Metadata",
+                "dynamic": false,
+                "info": "Key value pairs to be inserted into each ingested document.",
+                "is_list": true,
+                "list_add_label": "Add More",
+                "name": "docs_metadata",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "table_icon": "Table",
+                "table_schema": {
+                  "columns": [
+                    {
+                      "default": "None",
+                      "description": "Key name",
+                      "disable_edit": false,
+                      "display_name": "Key",
+                      "edit_mode": "popover",
+                      "filterable": true,
+                      "formatter": "text",
+                      "hidden": false,
+                      "name": "key",
+                      "sortable": true,
+                      "type": "str"
+                    },
+                    {
+                      "default": "None",
+                      "description": "Value of the metadata",
+                      "disable_edit": false,
+                      "display_name": "Value",
+                      "edit_mode": "popover",
+                      "filterable": true,
+                      "formatter": "text",
+                      "hidden": false,
+                      "name": "value",
+                      "sortable": true,
+                      "type": "str"
+                    }
+                  ]
+                },
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "trigger_icon": "Table",
+                "trigger_text": "Open table",
+                "type": "table",
+                "value": []
+              },
+              "ef_construction": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "EF Construction",
+                "dynamic": false,
+                "info": "Size of the dynamic list used during k-NN graph creation.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ef_construction",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 512
+              },
+              "embedding": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Embedding",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Embeddings"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "embedding",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "engine": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Engine",
+                "dynamic": false,
+                "info": "Vector search engine to use.",
+                "load_from_db": false,
+                "name": "engine",
+                "options": [
+                  "jvector",
+                  "nmslib",
+                  "faiss",
+                  "lucene"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "nmslib"
+              },
+              "filter_expression": {
+                "_input_type": "MultilineInput",
+                "advanced": false,
+                "copy_field": false,
+                "display_name": "Filter Expression (JSON)",
+                "dynamic": false,
+                "info": "Optional JSON to control filters/limit/score threshold.\nAccepted shapes:\n1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\n2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\nPlaceholders with __IMPOSSIBLE_VALUE__ are ignored.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "multiline": true,
+                "name": "filter_expression",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              },
+              "index_name": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "Index Name",
+                "dynamic": false,
+                "info": "The index to search.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "index_name",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "documents"
+              },
+              "ingest_data": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Ingest Data",
+                "dynamic": false,
+                "info": "",
+                "input_types": [
+                  "Data",
+                  "DataFrame"
+                ],
+                "list": true,
+                "list_add_label": "Add More",
+                "name": "ingest_data",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "jwt_header": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "JWT Header Name",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "jwt_header",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "Authorization"
+              },
+              "jwt_token": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "JWT Token",
+                "dynamic": false,
+                "info": "Paste a valid JWT (sent as a header).",
+                "input_types": [],
+                "load_from_db": false,
+                "name": "jwt_token",
+                "password": true,
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "type": "str",
+                "value": ""
+              },
+              "m": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "M Parameter",
+                "dynamic": false,
+                "info": "Number of bidirectional links created for each new element.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "m",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 16
+              },
+              "number_of_results": {
+                "_input_type": "IntInput",
+                "advanced": true,
+                "display_name": "Default Size (limit)",
+                "dynamic": false,
+                "info": "Default number of hits when no limit provided in filter_expression.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "number_of_results",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 15
+              },
+              "opensearch_url": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "OpenSearch URL",
+                "dynamic": false,
+                "info": "URL for your OpenSearch cluster.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "opensearch_url",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "https://opensearch:9200"
+              },
+              "password": {
+                "_input_type": "SecretStrInput",
+                "advanced": false,
+                "display_name": "Password",
+                "dynamic": false,
+                "info": "",
+                "input_types": [],
+                "load_from_db": false,
+                "name": "password",
+                "password": true,
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "type": "str",
+                "value": ""
+              },
+              "search_query": {
+                "_input_type": "QueryInput",
+                "advanced": false,
+                "display_name": "Search Query",
+                "dynamic": false,
+                "info": "Enter a query to run a similarity search.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "search_query",
+                "placeholder": "Enter a query...",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": true,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "query",
+                "value": ""
+              },
+              "should_cache_vector_store": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Cache Vector Store",
+                "dynamic": false,
+                "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "should_cache_vector_store",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "space_type": {
+                "_input_type": "DropdownInput",
+                "advanced": true,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Space Type",
+                "dynamic": false,
+                "info": "Distance metric for vector similarity.",
+                "name": "space_type",
+                "options": [
+                  "l2",
+                  "l1",
+                  "cosinesimil",
+                  "linf",
+                  "innerproduct"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "l2"
+              },
+              "use_ssl": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Use SSL",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "use_ssl",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "username": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "Username",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "username",
+                "placeholder": "",
+                "required": false,
+                "show": false,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "admin"
+              },
+              "vector_field": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Vector Field",
+                "dynamic": false,
+                "info": "Vector field used for KNN.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "vector_field",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "chunk_embedding"
+              },
+              "verify_certs": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Verify Certificates",
+                "dynamic": false,
+                "info": "",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "verify_certs",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "search_results",
+          "showNode": true,
+          "type": "OpenSearchHybrid"
+        },
+        "dragging": false,
+        "id": "OpenSearchHybrid-XtKoA",
+        "measured": {
+          "height": 765,
+          "width": 320
+        },
+        "position": {
+          "x": 2218.9287723423276,
+          "y": 1332.2598463956504
+        },
+        "selected": false,
+        "type": "genericNode"
+      },
+      {
+        "data": {
+          "id": "DoclingRemote-78KoX",
+          "node": {
+            "base_classes": [
+              "DataFrame"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Uses Docling to process input documents connecting to your instance of Docling Serve.",
+            "display_name": "Docling Serve",
+            "documentation": "https://docling-project.github.io/docling/",
+            "edited": false,
+            "field_order": [
+              "path",
+              "file_path",
+              "separator",
+              "silent_errors",
+              "delete_server_file_after_processing",
+              "ignore_unsupported_extensions",
+              "ignore_unspecified_files",
+              "api_url",
+              "max_concurrency",
+              "max_poll_timeout",
+              "api_headers",
+              "docling_serve_opts"
+            ],
+            "frozen": false,
+            "icon": "Docling",
+            "legacy": false,
+            "lf_version": "1.6.0",
+            "metadata": {
+              "code_hash": "930312ffe40c",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "httpx",
+                    "version": "0.28.1"
+                  },
+                  {
+                    "name": "docling_core",
+                    "version": "2.45.0"
+                  },
+                  {
+                    "name": "pydantic",
+                    "version": "2.10.6"
+                  },
+                  {
+                    "name": "lfx",
+                    "version": null
+                  }
+                ],
+                "total_dependencies": 4
+              },
+              "module": "lfx.components.docling.docling_remote.DoclingRemoteComponent"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Files",
+                "group_outputs": false,
+                "method": "load_files",
+                "name": "dataframe",
+                "selected": "DataFrame",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "api_headers": {
+                "_input_type": "NestedDictInput",
+                "advanced": true,
+                "display_name": "HTTP headers",
+                "dynamic": false,
+                "info": "Optional dictionary of additional headers required for connecting to Docling Serve.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "api_headers",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "NestedDict",
+                "value": {}
+              },
+              "api_url": {
+                "_input_type": "StrInput",
+                "advanced": false,
+                "display_name": "Server address",
+                "dynamic": false,
+                "info": "URL of the Docling Serve instance.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "api_url",
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "http://localhost:5001"
+              },
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "import base64\nimport time\nfrom concurrent.futures import Future, ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Any\n\nimport httpx\nfrom docling_core.types.doc import DoclingDocument\nfrom pydantic import ValidationError\n\nfrom lfx.base.data import BaseFileComponent\nfrom lfx.inputs import IntInput, NestedDictInput, StrInput\nfrom lfx.inputs.inputs import FloatInput\nfrom lfx.schema import Data\n\n\nclass DoclingRemoteComponent(BaseFileComponent):\n    display_name = \"Docling Serve\"\n    description = \"Uses Docling to process input documents connecting to your instance of Docling Serve.\"\n    documentation = \"https://docling-project.github.io/docling/\"\n    trace_type = \"tool\"\n    icon = \"Docling\"\n    name = \"DoclingRemote\"\n\n    MAX_500_RETRIES = 5\n\n    # https://docling-project.github.io/docling/usage/supported_formats/\n    VALID_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"csv\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"docx\",\n        \"htm\",\n        \"html\",\n        \"jpeg\",\n        \"json\",\n        \"md\",\n        \"pdf\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"txt\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"xml\",\n        \"webp\",\n    ]\n\n    inputs = [\n        *BaseFileComponent.get_base_inputs(),\n        StrInput(\n            name=\"api_url\",\n            display_name=\"Server address\",\n            info=\"URL of the Docling Serve instance.\",\n            required=True,\n        ),\n        IntInput(\n            name=\"max_concurrency\",\n            display_name=\"Concurrency\",\n            info=\"Maximum number of concurrent requests for the server.\",\n            advanced=True,\n            value=2,\n        ),\n        FloatInput(\n            name=\"max_poll_timeout\",\n            display_name=\"Maximum poll time\",\n            info=\"Maximum waiting time for the document conversion to complete.\",\n            advanced=True,\n            value=3600,\n        ),\n        NestedDictInput(\n            name=\"api_headers\",\n            display_name=\"HTTP headers\",\n            advanced=True,\n            required=False,\n            info=(\"Optional dictionary of additional headers required for connecting to Docling Serve.\"),\n        ),\n        NestedDictInput(\n            name=\"docling_serve_opts\",\n            display_name=\"Docling options\",\n            advanced=True,\n            required=False,\n            info=(\n                \"Optional dictionary of additional options. \"\n                \"See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.\"\n            ),\n        ),\n    ]\n\n    outputs = [\n        *BaseFileComponent.get_base_outputs(),\n    ]\n\n    def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:\n        base_url = f\"{self.api_url}/v1\"\n\n        def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:\n            encoded_doc = base64.b64encode(file_path.read_bytes()).decode()\n            payload = {\n                \"options\": options,\n                \"sources\": [{\"kind\": \"file\", \"base64_string\": encoded_doc, \"filename\": file_path.name}],\n            }\n\n            response = client.post(f\"{base_url}/convert/source/async\", json=payload)\n            response.raise_for_status()\n            task = response.json()\n\n            http_failures = 0\n            retry_status_start = 500\n            retry_status_end = 600\n            start_wait_time = time.monotonic()\n            while task[\"task_status\"] not in (\"success\", \"failure\"):\n                # Check if processing exceeds the maximum poll timeout\n                processing_time = time.monotonic() - start_wait_time\n                if processing_time >= self.max_poll_timeout:\n                    msg = (\n                        f\"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}.\"\n                        \"Please increase the max_poll_timeout parameter or review why the processing \"\n                        \"takes long on the server.\"\n                    )\n                    self.log(msg)\n                    raise RuntimeError(msg)\n\n                # Call for a new status update\n                time.sleep(2)\n                response = client.get(f\"{base_url}/status/poll/{task['task_id']}\")\n\n                # Check if the status call gets into 5xx errors and retry\n                if retry_status_start <= response.status_code < retry_status_end:\n                    http_failures += 1\n                    if http_failures > self.MAX_500_RETRIES:\n                        self.log(f\"The status requests got a http response {response.status_code} too many times.\")\n                        return None\n                    continue\n\n                # Update task status\n                task = response.json()\n\n            result_resp = client.get(f\"{base_url}/result/{task['task_id']}\")\n            result_resp.raise_for_status()\n            result = result_resp.json()\n\n            if \"json_content\" not in result[\"document\"] or result[\"document\"][\"json_content\"] is None:\n                self.log(\"No JSON DoclingDocument found in the result.\")\n                return None\n\n            try:\n                doc = DoclingDocument.model_validate(result[\"document\"][\"json_content\"])\n                return Data(data={\"doc\": doc, \"file_path\": str(file_path)})\n            except ValidationError as e:\n                self.log(f\"Error validating the document. {e}\")\n                return None\n\n        docling_options = {\n            \"to_formats\": [\"json\"],\n            \"image_export_mode\": \"placeholder\",\n            **(self.docling_serve_opts or {}),\n        }\n\n        processed_data: list[Data | None] = []\n        with (\n            httpx.Client(headers=self.api_headers) as client,\n            ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,\n        ):\n            futures: list[tuple[int, Future]] = []\n            for i, file in enumerate(file_list):\n                if file.path is None:\n                    processed_data.append(None)\n                    continue\n\n                futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))\n\n            for _index, future in futures:\n                try:\n                    result_data = future.result()\n                    processed_data.append(result_data)\n                except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:\n                    self.log(f\"Docling remote processing failed: {exc}\")\n                    raise\n\n        return self.rollup_data(file_list, processed_data)\n"
+              },
+              "delete_server_file_after_processing": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Delete Server File After Processing",
+                "dynamic": false,
+                "info": "If true, the Server File Path will be deleted after processing.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "delete_server_file_after_processing",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "docling_serve_opts": {
+                "_input_type": "NestedDictInput",
+                "advanced": false,
+                "display_name": "Docling options",
+                "dynamic": false,
+                "info": "Optional dictionary of additional options. See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "docling_serve_opts",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "NestedDict",
+                "value": {
+                  "do_ocr": false
+                }
+              },
+              "file_path": {
+                "_input_type": "HandleInput",
+                "advanced": true,
+                "display_name": "Server File Path",
+                "dynamic": false,
+                "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.",
+                "input_types": [
+                  "Data",
+                  "Message"
+                ],
+                "list": true,
+                "list_add_label": "Add More",
+                "name": "file_path",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "ignore_unspecified_files": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Ignore Unspecified Files",
+                "dynamic": false,
+                "info": "If true, Data with no 'file_path' property will be ignored.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ignore_unspecified_files",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              },
+              "ignore_unsupported_extensions": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Ignore Unsupported Extensions",
+                "dynamic": false,
+                "info": "If true, files with unsupported extensions will not be processed.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "ignore_unsupported_extensions",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": true
+              },
+              "max_concurrency": {
+                "_input_type": "IntInput",
+                "advanced": false,
+                "display_name": "Concurrency",
+                "dynamic": false,
+                "info": "Maximum number of concurrent requests for the server.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "max_concurrency",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "int",
+                "value": 2
+              },
+              "max_poll_timeout": {
+                "_input_type": "FloatInput",
+                "advanced": true,
+                "display_name": "Maximum poll time",
+                "dynamic": false,
+                "info": "Maximum waiting time for the document conversion to complete.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "max_poll_timeout",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "float",
+                "value": 3600
+              },
+              "path": {
+                "_input_type": "FileInput",
+                "advanced": false,
+                "display_name": "Files",
+                "dynamic": false,
+                "fileTypes": [
+                  "adoc",
+                  "asciidoc",
+                  "asc",
+                  "bmp",
+                  "csv",
+                  "dotx",
+                  "dotm",
+                  "docm",
+                  "docx",
+                  "htm",
+                  "html",
+                  "jpeg",
+                  "json",
+                  "md",
+                  "pdf",
+                  "png",
+                  "potx",
+                  "ppsx",
+                  "pptm",
+                  "potm",
+                  "ppsm",
+                  "pptx",
+                  "tiff",
+                  "txt",
+                  "xls",
+                  "xlsx",
+                  "xhtml",
+                  "xml",
+                  "webp",
+                  "zip",
+                  "tar",
+                  "tgz",
+                  "bz2",
+                  "gz"
+                ],
+                "file_path": [],
+                "info": "Supported file extensions: adoc, asciidoc, asc, bmp, csv, dotx, dotm, docm, docx, htm, html, jpeg, json, md, pdf, png, potx, ppsx, pptm, potm, ppsm, pptx, tiff, txt, xls, xlsx, xhtml, xml, webp; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
+                "list": true,
+                "list_add_label": "Add More",
+                "name": "path",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "temp_file": false,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "file",
+                "value": ""
+              },
+              "separator": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Separator",
+                "dynamic": false,
+                "info": "Specify the separator to use between multiple outputs in Message format.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "separator",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "\n\n"
+              },
+              "silent_errors": {
+                "_input_type": "BoolInput",
+                "advanced": true,
+                "display_name": "Silent Errors",
+                "dynamic": false,
+                "info": "If true, errors will not raise an exception.",
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "silent_errors",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "bool",
+                "value": false
+              }
+            },
+            "tool_mode": false
+          },
+          "showNode": true,
+          "type": "DoclingRemote"
+        },
+        "dragging": false,
+        "id": "DoclingRemote-78KoX",
+        "measured": {
+          "height": 475,
+          "width": 320
+        },
+        "position": {
+          "x": 974.2998232996713,
+          "y": 1337.9345348080217
+        },
+        "selected": true,
+        "type": "genericNode"
+      },
+      {
+        "data": {
+          "id": "ExportDoclingDocument-xFoCI",
+          "node": {
+            "base_classes": [
+              "Data",
+              "DataFrame"
+            ],
+            "beta": false,
+            "conditional_paths": [],
+            "custom_fields": {},
+            "description": "Export DoclingDocument to markdown, html or other formats.",
+            "display_name": "Export DoclingDocument",
+            "documentation": "https://docling-project.github.io/docling/",
+            "edited": false,
+            "field_order": [
+              "data_inputs",
+              "export_format",
+              "image_mode",
+              "md_image_placeholder",
+              "md_page_break_placeholder",
+              "doc_key"
+            ],
+            "frozen": false,
+            "icon": "Docling",
+            "legacy": false,
+            "lf_version": "1.6.0",
+            "metadata": {
+              "code_hash": "4de16ddd37ac",
+              "dependencies": {
+                "dependencies": [
+                  {
+                    "name": "docling_core",
+                    "version": "2.45.0"
+                  },
+                  {
+                    "name": "lfx",
+                    "version": null
+                  }
+                ],
+                "total_dependencies": 2
+              },
+              "module": "lfx.components.docling.export_docling_document.ExportDoclingDocumentComponent"
+            },
+            "minimized": false,
+            "output_types": [],
+            "outputs": [
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "Exported data",
+                "group_outputs": false,
+                "method": "export_document",
+                "name": "data",
+                "selected": "Data",
+                "tool_mode": true,
+                "types": [
+                  "Data"
+                ],
+                "value": "__UNDEFINED__"
+              },
+              {
+                "allows_loop": false,
+                "cache": true,
+                "display_name": "DataFrame",
+                "group_outputs": false,
+                "method": "as_dataframe",
+                "name": "dataframe",
+                "tool_mode": true,
+                "types": [
+                  "DataFrame"
+                ],
+                "value": "__UNDEFINED__"
+              }
+            ],
+            "pinned": false,
+            "template": {
+              "_type": "Component",
+              "code": {
+                "advanced": true,
+                "dynamic": true,
+                "fileTypes": [],
+                "file_path": "",
+                "info": "",
+                "list": false,
+                "load_from_db": false,
+                "multiline": true,
+                "name": "code",
+                "password": false,
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "type": "code",
+                "value": "from typing import Any\n\nfrom docling_core.types.doc import ImageRefMode\n\nfrom lfx.base.data.docling_utils import extract_docling_documents\nfrom lfx.custom import Component\nfrom lfx.io import DropdownInput, HandleInput, MessageTextInput, Output, StrInput\nfrom lfx.schema import Data, DataFrame\n\n\nclass ExportDoclingDocumentComponent(Component):\n    display_name: str = \"Export DoclingDocument\"\n    description: str = \"Export DoclingDocument to markdown, html or other formats.\"\n    documentation = \"https://docling-project.github.io/docling/\"\n    icon = \"Docling\"\n    name = \"ExportDoclingDocument\"\n\n    inputs = [\n        HandleInput(\n            name=\"data_inputs\",\n            display_name=\"Data or DataFrame\",\n            info=\"The data with documents to export.\",\n            input_types=[\"Data\", \"DataFrame\"],\n            required=True,\n        ),\n        DropdownInput(\n            name=\"export_format\",\n            display_name=\"Export format\",\n            options=[\"Markdown\", \"HTML\", \"Plaintext\", \"DocTags\"],\n            info=\"Select the export format to convert the input.\",\n            value=\"Markdown\",\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"image_mode\",\n            display_name=\"Image export mode\",\n            options=[\"placeholder\", \"embedded\"],\n            info=(\n                \"Specify how images are exported in the output. Placeholder will replace the images with a string, \"\n                \"whereas Embedded will include them as base64 encoded images.\"\n            ),\n            value=\"placeholder\",\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder betweek pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Exported data\", name=\"data\", method=\"export_document\"),\n        Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n    ]\n\n    def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:\n        if field_name == \"export_format\" and field_value == \"Markdown\":\n            build_config[\"md_image_placeholder\"][\"show\"] = True\n            build_config[\"md_page_break_placeholder\"][\"show\"] = True\n            build_config[\"image_mode\"][\"show\"] = True\n        elif field_name == \"export_format\" and field_value == \"HTML\":\n            build_config[\"md_image_placeholder\"][\"show\"] = False\n            build_config[\"md_page_break_placeholder\"][\"show\"] = False\n            build_config[\"image_mode\"][\"show\"] = True\n        elif field_name == \"export_format\" and field_value in {\"Plaintext\", \"DocTags\"}:\n            build_config[\"md_image_placeholder\"][\"show\"] = False\n            build_config[\"md_page_break_placeholder\"][\"show\"] = False\n            build_config[\"image_mode\"][\"show\"] = False\n\n        return build_config\n\n    def export_document(self) -> list[Data]:\n        documents = extract_docling_documents(self.data_inputs, self.doc_key)\n\n        results: list[Data] = []\n        try:\n            image_mode = ImageRefMode(self.image_mode)\n            for doc in documents:\n                content = \"\"\n                if self.export_format == \"Markdown\":\n                    content = doc.export_to_markdown(\n                        image_mode=image_mode,\n                        image_placeholder=self.md_image_placeholder,\n                        page_break_placeholder=self.md_page_break_placeholder,\n                    )\n                elif self.export_format == \"HTML\":\n                    content = doc.export_to_html(image_mode=image_mode)\n                elif self.export_format == \"Plaintext\":\n                    content = doc.export_to_text()\n                elif self.export_format == \"DocTags\":\n                    content = doc.export_to_doctags()\n\n                results.append(Data(text=content))\n        except Exception as e:\n            msg = f\"Error splitting text: {e}\"\n            raise TypeError(msg) from e\n\n        return results\n\n    def as_dataframe(self) -> DataFrame:\n        return DataFrame(self.export_document())\n"
+              },
+              "data_inputs": {
+                "_input_type": "HandleInput",
+                "advanced": false,
+                "display_name": "Data or DataFrame",
+                "dynamic": false,
+                "info": "The data with documents to export.",
+                "input_types": [
+                  "Data",
+                  "DataFrame"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "name": "data_inputs",
+                "placeholder": "",
+                "required": true,
+                "show": true,
+                "title_case": false,
+                "trace_as_metadata": true,
+                "type": "other",
+                "value": ""
+              },
+              "doc_key": {
+                "_input_type": "MessageTextInput",
+                "advanced": true,
+                "display_name": "Doc Key",
+                "dynamic": false,
+                "info": "The key to use for the DoclingDocument column.",
+                "input_types": [
+                  "Message"
+                ],
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "doc_key",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_input": true,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "doc"
+              },
+              "export_format": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Export format",
+                "dynamic": false,
+                "info": "Select the export format to convert the input.",
+                "name": "export_format",
+                "options": [
+                  "Markdown",
+                  "HTML",
+                  "Plaintext",
+                  "DocTags"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "real_time_refresh": true,
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "Markdown"
+              },
+              "image_mode": {
+                "_input_type": "DropdownInput",
+                "advanced": false,
+                "combobox": false,
+                "dialog_inputs": {},
+                "display_name": "Image export mode",
+                "dynamic": false,
+                "info": "Specify how images are exported in the output. Placeholder will replace the images with a string, whereas Embedded will include them as base64 encoded images.",
+                "name": "image_mode",
+                "options": [
+                  "placeholder",
+                  "embedded"
+                ],
+                "options_metadata": [],
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "toggle": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "placeholder"
+              },
+              "md_image_placeholder": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Image placeholder",
+                "dynamic": false,
+                "info": "Specify the image placeholder for markdown exports.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "md_image_placeholder",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": "<!-- image -->"
+              },
+              "md_page_break_placeholder": {
+                "_input_type": "StrInput",
+                "advanced": true,
+                "display_name": "Page break placeholder",
+                "dynamic": false,
+                "info": "Add this placeholder betweek pages in the markdown output.",
+                "list": false,
+                "list_add_label": "Add More",
+                "load_from_db": false,
+                "name": "md_page_break_placeholder",
+                "placeholder": "",
+                "required": false,
+                "show": true,
+                "title_case": false,
+                "tool_mode": false,
+                "trace_as_metadata": true,
+                "type": "str",
+                "value": ""
+              }
+            },
+            "tool_mode": false
+          },
+          "selected_output": "data",
+          "showNode": true,
+          "type": "ExportDoclingDocument"
+        },
+        "dragging": false,
+        "id": "ExportDoclingDocument-xFoCI",
+        "measured": {
+          "height": 347,
+          "width": 320
+        },
+        "position": {
+          "x": 1354.7013688969873,
+          "y": 1365.2986945152204
+        },
+        "selected": false,
+        "type": "genericNode"
+      }
+    ],
+    "viewport": {
+      "x": -708.9707113557265,
+      "y": -965.7967428241175,
+      "zoom": 0.7967811989815704
+    }
+  },
+  "description": "Load your data for chat context with Retrieval Augmented Generation.",
+  "endpoint_name": null,
+  "id": "1402618b-e6d1-4ff2-9a11-d6ce71186915",
+  "is_component": false,
+  "last_tested_version": "1.6.0",
+  "name": "OpenSearch Ingestion Flow Docling Serve",
+  "tags": [
+    "openai",
+    "astradb",
+    "rag",
+    "q-a"
+  ]
+}
\ No newline at end of file

From febebd7f77a70e6a47d97b76d3784f421fa60368 Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Tue, 23 Sep 2025 12:12:47 -0400
Subject: [PATCH 08/19] init

---
 docs/docs/get-started/intro.mdx           | 48 -----------------------
 docs/docs/get-started/what-is-openrag.mdx | 10 +++++
 docs/sidebars.js                          |  2 +-
 3 files changed, 11 insertions(+), 49 deletions(-)
 delete mode 100644 docs/docs/get-started/intro.mdx
 create mode 100644 docs/docs/get-started/what-is-openrag.mdx

diff --git a/docs/docs/get-started/intro.mdx b/docs/docs/get-started/intro.mdx
deleted file mode 100644
index 22763874..00000000
--- a/docs/docs/get-started/intro.mdx
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: What is OpenRAG?
-slug: /
----
-
-# OpenRAG Introduction
-
-Let's discover **Docusaurus in less than 5 minutes**.
-
-## Getting Started
-
-Get started by **creating a new site**.
-
-Or **try Docusaurus immediately** with **[docusaurus.new](https://docusaurus.new)**.
-
-### What you'll need
-
-- [Node.js](https://nodejs.org/en/download/) version 18.0 or above:
-  - When installing Node.js, you are recommended to check all checkboxes related to dependencies.
-
-## Generate a new site
-
-Generate a new Docusaurus site using the **classic template**.
-
-The classic template will automatically be added to your project after you run the command:
-
-```bash
-npm init docusaurus@latest my-website classic
-```
-
-You can type this command into Command Prompt, Powershell, Terminal, or any other integrated terminal of your code editor.
-
-The command also installs all necessary dependencies you need to run Docusaurus.
-
-## Start your site
-
-Run the development server:
-
-```bash
-cd my-website
-npm run start
-```
-
-The `cd` command changes the directory you're working with. In order to work with your newly created Docusaurus site, you'll need to navigate the terminal there.
-
-The `npm run start` command builds your website locally and serves it through a development server, ready for you to view at http://localhost:3000/.
-
-Open `docs/intro.md` (this page) and edit some lines: the site **reloads automatically** and displays your changes.
diff --git a/docs/docs/get-started/what-is-openrag.mdx b/docs/docs/get-started/what-is-openrag.mdx
new file mode 100644
index 00000000..f5b5eef7
--- /dev/null
+++ b/docs/docs/get-started/what-is-openrag.mdx
@@ -0,0 +1,10 @@
+---
+title: What is OpenRAG?
+slug: /what-is-openrag
+---
+
+OpenRAG exists at the confluence of three powerful open-source projects:
+
+* Langflow for agentic workflows
+* OpenSearch for vector databases
+* Docling for data ingestion from common sources like OneDrive, Google Drive, and AWS
\ No newline at end of file
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 51a4ddc3..568989e5 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -22,7 +22,7 @@ const sidebars = {
       items: [
         {
           type: "doc",
-          id: "get-started/intro",
+          id: "get-started/what-is-openrag",
           label: "Introduction"
         },
         {

From a8c8383259dbcb19d4dd51369dd3e56508de17ec Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Tue, 23 Sep 2025 13:21:33 -0400
Subject: [PATCH 09/19] what-is-openrag-init

---
 docs/docs/get-started/what-is-openrag.mdx | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/docs/get-started/what-is-openrag.mdx b/docs/docs/get-started/what-is-openrag.mdx
index f5b5eef7..7b411617 100644
--- a/docs/docs/get-started/what-is-openrag.mdx
+++ b/docs/docs/get-started/what-is-openrag.mdx
@@ -3,8 +3,17 @@ title: What is OpenRAG?
 slug: /what-is-openrag
 ---
 
-OpenRAG exists at the confluence of three powerful open-source projects:
+OpenRAG is an open-source package for building agentic RAG systems.
+It supports integration with a wide range of orchestration tools, vector databases, and LLM providers.
 
-* Langflow for agentic workflows
-* OpenSearch for vector databases
-* Docling for data ingestion from common sources like OneDrive, Google Drive, and AWS
\ No newline at end of file
+OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform:
+
+* [Langflow](https://docs.langflow.org) - Langflow is a powerful tool to build and deploy AI agents and MCP servers. It supports all major LLMs, vector databases and a growing library of AI tools. 
+
+* [OpenSearch](https://docs.opensearch.org/latest/) - OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data.
+
+* [Docling](https://docling-project.github.io/docling/) - Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem. 
+
+OpenRAG builds on Langflow's familiar interface while adding OpenSearch for vector storage and Docling for simplified document parsing, with opinionated flows that serve as ready-to-use recipes for ingestion, retrieval, and generation from popular sources like OneDrive, Google Drive, and AWS. And don't fear: every part of the stack is swappable. Write your own custom components in Python, try different language models, and customize your flows to build an agentic RAG system that solves problems.
+
+Ready to get started? Install OpenRAG and then run the Quickstart to create a powerful RAG pipeline.
\ No newline at end of file

From 499969728b7aaeed7bb28d5acebd96896bd04aaf Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Tue, 23 Sep 2025 13:44:38 -0400
Subject: [PATCH 10/19] fix-base-path

---
 docs/docusaurus.config.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js
index c4175c09..4248c3e2 100644
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -71,7 +71,7 @@ const config = {
         logo: {
           alt: 'OpenRAG Logo',
           src: 'img/logo.svg',
-          href: '/',
+          href: 'what-is-openrag',
         },
         items: [
           {
@@ -89,7 +89,7 @@ const config = {
             items: [
               {
                 label: 'Getting Started',
-                to: '/',
+                to: 'what-is-openrag',
               },
             ],
           },

From 70da229e40fb9577f8320abd7841bf66a51c1fc6 Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 11:53:58 -0600
Subject: [PATCH 11/19] commit and make select work

---
 frontend/src/app/knowledge/chunks/page.tsx | 60 ++++++++++++++++------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index b59a8760..9a889dae 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -39,6 +39,9 @@ function ChunksPageContent() {
   const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
     ChunkResult[]
   >([]);
+  const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
+
+  // Calculate average chunk length
   const averageChunkLength = useMemo(
     () =>
       chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
@@ -84,10 +87,34 @@ function ChunksPageContent() {
     setChunks(fileData?.chunks || []);
   }, [data, filename, fileData?.chunks]);
 
+  // Set selected state for all checkboxes when selectAll changes
+  useEffect(() => {
+    if (selectAll) {
+      setSelectedChunks(new Set(chunks.map((_, index) => index)));
+    } else {
+      setSelectedChunks(new Set());
+    }
+  }, [selectAll, setSelectedChunks, chunks]);
+
   const handleBack = useCallback(() => {
-    router.back();
+    router.push("/knowledge");
   }, [router]);
 
+  const handleChunkCardCheckboxChange = useCallback(
+    (index: number) => {
+      setSelectedChunks((prevSelected) => {
+        const newSelected = new Set(prevSelected);
+        if (newSelected.has(index)) {
+          newSelected.delete(index);
+        } else {
+          newSelected.add(index);
+        }
+        return newSelected;
+      });
+    },
+    [setSelectedChunks]
+  );
+
   if (!filename) {
     return (
       <div className="flex items-center justify-center h-64">
@@ -134,11 +161,13 @@ function ChunksPageContent() {
               <Checkbox
                 id="selectAllChunks"
                 checked={selectAll}
-                onCheckedChange={(checked) => setSelectAll(checked === true)}
+                onCheckedChange={(handleSelectAll) =>
+                  setSelectAll(!!handleSelectAll)
+                }
               />
               <Label
                 htmlFor="selectAllChunks"
-                className="font-medium text-muted-foreground whitespace-nowrap"
+                className="font-medium text-muted-foreground whitespace-nowrap cursor-pointer"
               >
                 Select all
               </Label>
@@ -192,7 +221,10 @@ function ChunksPageContent() {
                   <div className="flex items-center justify-between mb-2">
                     <div className="flex items-center gap-3">
                       <div>
-                        <Checkbox />
+                        <Checkbox
+                          checked={selectedChunks.has(index)}
+                          onClick={() => handleChunkCardCheckboxChange(index)}
+                        />
                       </div>
                       <span className="text-sm text-bold">
                         Chunk {chunk.page}
@@ -221,11 +253,9 @@ function ChunksPageContent() {
                       Active
                     </span> */}
                   </div>
-                  <div>
-                    <blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-color-input ml-1.5 pl-4">
-                      {chunk.text}
-                    </blockquote>
-                  </div>
+                  <blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-input ml-1.5 pl-4">
+                    {chunk.text}
+                  </blockquote>
                 </div>
               ))}
             </div>
@@ -249,18 +279,17 @@ function ChunksPageContent() {
                 {averageChunkLength.toFixed(0)} chars
               </dd>
             </div>
-            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+            {/* TODO: Uncomment after data is available */}
+            {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Process time</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                {/* {averageChunkLength.toFixed(0)} chars */}
               </dd>
             </div>
             <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Model</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
-                {/* {averageChunkLength.toFixed(0)} chars */}
               </dd>
-            </div>
+            </div> */}
           </dl>
         </div>
         <div className="mb-8">
@@ -292,10 +321,11 @@ function ChunksPageContent() {
                 N/A
               </dd>
             </div>
-            <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
+            {/* TODO: Uncomment after data is available */}
+            {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Source</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
-            </div>
+            </div> */}
             <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
               <dt className="text-sm/6 text-muted-foreground">Updated</dt>
               <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">

From a425ba62fbcc34bb3b10281a44ba48edc466fe3e Mon Sep 17 00:00:00 2001
From: boneill-ds <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:24:25 -0600
Subject: [PATCH 12/19] Update frontend/src/app/knowledge/chunks/page.tsx

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 frontend/src/app/knowledge/chunks/page.tsx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 9a889dae..c55690c5 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -26,6 +26,7 @@ const getFileTypeLabel = (mimetype: string) => {
   if (mimetype === "application/pdf") return "PDF";
   if (mimetype === "text/plain") return "Text";
   if (mimetype === "application/msword") return "Word Document";
+  return "Unknown";
 };
 
 function ChunksPageContent() {

From 3cb33526dbf1fd9f28a89e2d8895911c027b4a2d Mon Sep 17 00:00:00 2001
From: boneill-ds <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:24:41 -0600
Subject: [PATCH 13/19] Update frontend/src/app/knowledge/chunks/page.tsx

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 frontend/src/app/knowledge/chunks/page.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index c55690c5..d538ce36 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -62,8 +62,8 @@ function ChunksPageContent() {
     if (queryInputText === "") {
       setChunksFilteredByQuery(chunks);
     } else {
-      setChunksFilteredByQuery((prevChunks) =>
-        prevChunks.filter((chunk) =>
+      setChunksFilteredByQuery(
+        chunks.filter((chunk) =>
           chunk.text.toLowerCase().includes(queryInputText.toLowerCase())
         )
       );

From c33b5bcd4ed265c3e3d57d9b1436fdd2121a1e77 Mon Sep 17 00:00:00 2001
From: boneill-ds <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:24:49 -0600
Subject: [PATCH 14/19] Update frontend/src/app/knowledge/chunks/page.tsx

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 frontend/src/app/knowledge/chunks/page.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index d538ce36..52b4bcfd 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -86,7 +86,7 @@ function ChunksPageContent() {
     }
 
     setChunks(fileData?.chunks || []);
-  }, [data, filename, fileData?.chunks]);
+  }, [data, filename]);
 
   // Set selected state for all checkboxes when selectAll changes
   useEffect(() => {

From 9caebae22e03b2c795d85dde3f0326c99280ab7b Mon Sep 17 00:00:00 2001
From: boneill-ds <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:24:57 -0600
Subject: [PATCH 15/19] Update frontend/src/app/knowledge/chunks/page.tsx

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 frontend/src/app/knowledge/chunks/page.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 52b4bcfd..73a687b1 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -227,7 +227,7 @@ function ChunksPageContent() {
                           onClick={() => handleChunkCardCheckboxChange(index)}
                         />
                       </div>
-                      <span className="text-sm text-bold">
+                      <span className="text-sm font-bold">
                         Chunk {chunk.page}
                       </span>
                       <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">

From 5a473541ad0ce934af4c879156745d30c608133f Mon Sep 17 00:00:00 2001
From: boneill-ds <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:25:40 -0600
Subject: [PATCH 16/19] Update frontend/src/app/knowledge/chunks/page.tsx

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 frontend/src/app/knowledge/chunks/page.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 73a687b1..7de98830 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -224,7 +224,7 @@ function ChunksPageContent() {
                       <div>
                         <Checkbox
                           checked={selectedChunks.has(index)}
-                          onClick={() => handleChunkCardCheckboxChange(index)}
+                          onCheckedChange={(checked) => handleChunkCardCheckboxChange(checked, index)}
                         />
                       </div>
                       <span className="text-sm font-bold">

From 1808ccc149c3dccc490e630b10d10d55d935bf6a Mon Sep 17 00:00:00 2001
From: Brent O'Neill <brent.oneill@datastax.com>
Date: Tue, 23 Sep 2025 13:59:35 -0600
Subject: [PATCH 17/19] fix selection

---
 frontend/src/app/knowledge/chunks/page.tsx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx
index 7de98830..cdc9fcc3 100644
--- a/frontend/src/app/knowledge/chunks/page.tsx
+++ b/frontend/src/app/knowledge/chunks/page.tsx
@@ -224,7 +224,9 @@ function ChunksPageContent() {
                       <div>
                         <Checkbox
                           checked={selectedChunks.has(index)}
-                          onCheckedChange={(checked) => handleChunkCardCheckboxChange(checked, index)}
+                          onCheckedChange={() =>
+                            handleChunkCardCheckboxChange(index)
+                          }
                         />
                       </div>
                       <span className="text-sm font-bold">

From 917b078c20cb810565b114e30cb9af422f8ec8fd Mon Sep 17 00:00:00 2001
From: Mike Fortman <michael.fortman@datastax.com>
Date: Tue, 23 Sep 2025 16:01:02 -0500
Subject: [PATCH 18/19] old tweaks cleanup

---
 frontend/src/app/settings/page.tsx |  2 ++
 src/agent.py                       | 18 +-----------------
 src/api/settings.py                | 16 +---------------
 src/services/chat_service.py       | 11 -----------
 4 files changed, 4 insertions(+), 43 deletions(-)

diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index 50dc7867..eea555c2 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -152,6 +152,7 @@ function KnowledgeSourcesPage() {
     },
   });
 
+
   // Debounced update function
   const debouncedUpdate = useDebounce(
     (variables: Parameters<typeof updateFlowSettingMutation.mutate>[0]) => {
@@ -219,6 +220,7 @@ function KnowledgeSourcesPage() {
   // Update processing mode
   const handleProcessingModeChange = (mode: string) => {
     setProcessingMode(mode);
+    // Update the configuration setting (backend will also update the flow automatically)
     debouncedUpdate({ doclingPresets: mode });
   };
 
diff --git a/src/agent.py b/src/agent.py
index ab99f597..1eb362bd 100644
--- a/src/agent.py
+++ b/src/agent.py
@@ -106,7 +106,6 @@ async def async_response_stream(
     model: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
     log_prefix: str = "response",
 ):
     logger.info("User prompt received", prompt=prompt)
@@ -121,8 +120,6 @@ async def async_response_stream(
         }
         if previous_response_id is not None:
             request_params["previous_response_id"] = previous_response_id
-        if tweaks:
-            request_params["tweaks"] = tweaks
 
         if "x-api-key" not in client.default_headers:
             if hasattr(client, "api_key") and extra_headers is not None:
@@ -199,7 +196,6 @@ async def async_response(
     model: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
     log_prefix: str = "response",
 ):
     try:
@@ -214,8 +210,6 @@ async def async_response(
         }
         if previous_response_id is not None:
             request_params["previous_response_id"] = previous_response_id
-        if tweaks:
-            request_params["tweaks"] = tweaks
         if extra_headers:
             request_params["extra_headers"] = extra_headers
 
@@ -249,7 +243,6 @@ async def async_stream(
     model: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
     log_prefix: str = "response",
 ):
     async for chunk in async_response_stream(
@@ -258,7 +251,6 @@ async def async_stream(
         model,
         extra_headers=extra_headers,
         previous_response_id=previous_response_id,
-        tweaks=tweaks,
         log_prefix=log_prefix,
     ):
         yield chunk
@@ -271,7 +263,6 @@ async def async_langflow(
     prompt: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
 ):
     response_text, response_id, response_obj = await async_response(
         langflow_client,
@@ -279,7 +270,6 @@ async def async_langflow(
         flow_id,
         extra_headers=extra_headers,
         previous_response_id=previous_response_id,
-        tweaks=tweaks,
         log_prefix="langflow",
     )
     return response_text, response_id
@@ -292,7 +282,6 @@ async def async_langflow_stream(
     prompt: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
 ):
     logger.debug("Starting langflow stream", prompt=prompt)
     try:
@@ -302,8 +291,7 @@ async def async_langflow_stream(
             flow_id,
             extra_headers=extra_headers,
             previous_response_id=previous_response_id,
-            tweaks=tweaks,
-            log_prefix="langflow",
+                log_prefix="langflow",
         ):
             logger.debug(
                 "Yielding chunk from langflow stream",
@@ -463,7 +451,6 @@ async def async_langflow_chat(
     user_id: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
     store_conversation: bool = True,
 ):
     logger.debug(
@@ -497,7 +484,6 @@ async def async_langflow_chat(
         flow_id,
         extra_headers=extra_headers,
         previous_response_id=previous_response_id,
-        tweaks=tweaks,
         log_prefix="langflow",
     )
     logger.debug(
@@ -576,7 +562,6 @@ async def async_langflow_chat_stream(
     user_id: str,
     extra_headers: dict = None,
     previous_response_id: str = None,
-    tweaks: dict = None,
 ):
     logger.debug(
         "async_langflow_chat_stream called",
@@ -603,7 +588,6 @@ async def async_langflow_chat_stream(
         flow_id,
         extra_headers=extra_headers,
         previous_response_id=previous_response_id,
-        tweaks=tweaks,
         log_prefix="langflow",
     ):
         # Extract text content to build full response for history
diff --git a/src/api/settings.py b/src/api/settings.py
index 9723cdeb..560eb400 100644
--- a/src/api/settings.py
+++ b/src/api/settings.py
@@ -47,22 +47,7 @@ def get_docling_preset_configs():
     }
 
 
-def get_docling_tweaks(docling_preset: str = None) -> dict:
-    """Get Langflow tweaks for docling component based on preset"""
-    if not docling_preset:
-        # Get current preset from config
-        openrag_config = get_openrag_config()
-        docling_preset = openrag_config.knowledge.doclingPresets
 
-    preset_configs = get_docling_preset_configs()
-
-    if docling_preset not in preset_configs:
-        docling_preset = "standard"  # fallback
-
-    preset_config = preset_configs[docling_preset]
-    docling_serve_opts = json.dumps(preset_config)
-
-    return {"DoclingRemote-ayRdw": {"docling_serve_opts": docling_serve_opts}}
 
 
 async def get_settings(request, session_manager):
@@ -626,3 +611,4 @@ async def update_docling_preset(request, session_manager):
             {"error": f"Failed to update docling preset: {str(e)}"},
             status_code=500
         )
+
diff --git a/src/services/chat_service.py b/src/services/chat_service.py
index 4b3c9d26..5ffe30f9 100644
--- a/src/services/chat_service.py
+++ b/src/services/chat_service.py
@@ -2,7 +2,6 @@ import json
 from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID
 from agent import async_chat, async_langflow, async_chat_stream
 from auth_context import set_auth_context
-from api.settings import get_docling_tweaks
 from utils.logging_config import get_logger
 
 logger = get_logger(__name__)
@@ -127,8 +126,6 @@ class ChatService:
                 "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
             )
 
-        # Get docling tweaks based on current configuration
-        docling_tweaks = get_docling_tweaks()
 
         if stream:
             from agent import async_langflow_chat_stream
@@ -140,7 +137,6 @@ class ChatService:
                 user_id,
                 extra_headers=extra_headers,
                 previous_response_id=previous_response_id,
-                tweaks=docling_tweaks,
             )
         else:
             from agent import async_langflow_chat
@@ -152,7 +148,6 @@ class ChatService:
                 user_id,
                 extra_headers=extra_headers,
                 previous_response_id=previous_response_id,
-                tweaks=docling_tweaks,
             )
             response_data = {"response": response_text}
             if response_id:
@@ -202,8 +197,6 @@ class ChatService:
 
         from agent import async_langflow_chat
 
-        # Get docling tweaks (might not be used by nudges flow, but keeping consistent)
-        docling_tweaks = get_docling_tweaks()
 
         response_text, response_id = await async_langflow_chat(
             langflow_client,
@@ -211,7 +204,6 @@ class ChatService:
             prompt,
             user_id,
             extra_headers=extra_headers,
-            tweaks=docling_tweaks,
             store_conversation=False,
         )
         response_data = {"response": response_text}
@@ -242,8 +234,6 @@ class ChatService:
                 raise ValueError(
                     "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY."
                 )
-            # Get docling tweaks based on current configuration
-            docling_tweaks = get_docling_tweaks()
 
             response_text, response_id = await async_langflow(
                 langflow_client=langflow_client,
@@ -251,7 +241,6 @@ class ChatService:
                 prompt=document_prompt,
                 extra_headers=extra_headers,
                 previous_response_id=previous_response_id,
-                tweaks=docling_tweaks,
             )
         else:  # chat
             # Set auth context for chat tools and provide user_id

From be8e13a173206a72a55dc0d805def0dd1b34a531 Mon Sep 17 00:00:00 2001
From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com>
Date: Wed, 24 Sep 2025 07:27:59 -0600
Subject: [PATCH 19/19] feat: add knowledge status (#53)

* feat: add status handling and visual indicators for file statuses

* refactor: comment out status field and related rendering logic in SearchPage

* format

* add timeout on mutation delete document

* make file fields be optional

* fetch task files and display them on knowledge page

* add tasks to files inside task context

* added failed to status badge

* added files on get all tasks on backend

* Changed models to get parameters by settings if not existent

* changed settings page to get models when is no ajth mode

* fixed openai allowing validation even when value is not present

* removed unused console log

---------

Co-authored-by: Lucas Oliveira <lucas.edu.oli@hotmail.com>
Co-authored-by: Mike Fortman <michael.fortman@datastax.com>
---
 .../app/api/mutations/useDeleteDocument.ts    |   8 +-
 .../src/app/api/queries/useGetModelsQuery.ts  |   2 +-
 .../src/app/api/queries/useGetSearchQuery.ts  |  27 ++--
 frontend/src/app/knowledge/page.tsx           |  77 +++++++---
 frontend/src/app/settings/page.tsx            | 110 +++++++++----
 .../ui/animated-processing-icon.tsx           |  49 ++++++
 frontend/src/components/ui/status-badge.tsx   |  58 +++++++
 frontend/src/contexts/task-context.tsx        | 145 ++++++++++++++++--
 src/api/models.py                             |  97 ++++++++++--
 src/services/task_service.py                  |  66 ++++++--
 10 files changed, 529 insertions(+), 110 deletions(-)
 create mode 100644 frontend/src/components/ui/animated-processing-icon.tsx
 create mode 100644 frontend/src/components/ui/status-badge.tsx

diff --git a/frontend/src/app/api/mutations/useDeleteDocument.ts b/frontend/src/app/api/mutations/useDeleteDocument.ts
index 78985498..47b852b1 100644
--- a/frontend/src/app/api/mutations/useDeleteDocument.ts
+++ b/frontend/src/app/api/mutations/useDeleteDocument.ts
@@ -14,7 +14,7 @@ interface DeleteDocumentResponse {
 }
 
 const deleteDocument = async (
-  data: DeleteDocumentRequest
+  data: DeleteDocumentRequest,
 ): Promise<DeleteDocumentResponse> => {
   const response = await fetch("/api/documents/delete-by-filename", {
     method: "POST",
@@ -37,9 +37,11 @@ export const useDeleteDocument = () => {
 
   return useMutation({
     mutationFn: deleteDocument,
-    onSuccess: () => {
+    onSettled: () => {
       // Invalidate and refetch search queries to update the UI
-      queryClient.invalidateQueries({ queryKey: ["search"] });
+      setTimeout(() => {
+        queryClient.invalidateQueries({ queryKey: ["search"] });
+      }, 1000);
     },
   });
 };
diff --git a/frontend/src/app/api/queries/useGetModelsQuery.ts b/frontend/src/app/api/queries/useGetModelsQuery.ts
index cd24131b..4ce55bd3 100644
--- a/frontend/src/app/api/queries/useGetModelsQuery.ts
+++ b/frontend/src/app/api/queries/useGetModelsQuery.ts
@@ -54,7 +54,7 @@ export const useGetOpenAIModelsQuery = (
       queryKey: ["models", "openai", params],
       queryFn: getOpenAIModels,
       retry: 2,
-      enabled: options?.enabled !== false, // Allow enabling/disabling from options
+      enabled: !!params?.apiKey,
       staleTime: 0, // Always fetch fresh data
       gcTime: 0, // Don't cache results
       ...options,
diff --git a/frontend/src/app/api/queries/useGetSearchQuery.ts b/frontend/src/app/api/queries/useGetSearchQuery.ts
index 9928af3d..37798ce5 100644
--- a/frontend/src/app/api/queries/useGetSearchQuery.ts
+++ b/frontend/src/app/api/queries/useGetSearchQuery.ts
@@ -34,21 +34,28 @@ export interface ChunkResult {
 export interface File {
   filename: string;
   mimetype: string;
-  chunkCount: number;
-  avgScore: number;
+  chunkCount?: number;
+  avgScore?: number;
   source_url: string;
-  owner: string;
-  owner_name: string;
-  owner_email: string;
+  owner?: string;
+  owner_name?: string;
+  owner_email?: string;
   size: number;
   connector_type: string;
-  chunks: ChunkResult[];
+  status?:
+    | "processing"
+    | "active"
+    | "unavailable"
+    | "failed"
+    | "hidden"
+    | "sync";
+  chunks?: ChunkResult[];
 }
 
 export const useGetSearchQuery = (
   query: string,
   queryData?: ParsedQueryData | null,
-  options?: Omit<UseQueryOptions, "queryKey" | "queryFn">
+  options?: Omit<UseQueryOptions, "queryKey" | "queryFn">,
 ) => {
   const queryClient = useQueryClient();
 
@@ -149,7 +156,7 @@ export const useGetSearchQuery = (
         }
       });
 
-      const files: File[] = Array.from(fileMap.values()).map(file => ({
+      const files: File[] = Array.from(fileMap.values()).map((file) => ({
         filename: file.filename,
         mimetype: file.mimetype,
         chunkCount: file.chunks.length,
@@ -173,11 +180,11 @@ export const useGetSearchQuery = (
   const queryResult = useQuery(
     {
       queryKey: ["search", effectiveQuery],
-      placeholderData: prev => prev,
+      placeholderData: (prev) => prev,
       queryFn: getFiles,
       ...options,
     },
-    queryClient
+    queryClient,
   );
 
   return queryResult;
diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx
index ee116a71..5155f4e2 100644
--- a/frontend/src/app/knowledge/page.tsx
+++ b/frontend/src/app/knowledge/page.tsx
@@ -1,16 +1,10 @@
 "use client";
 
-import {
-  Building2,
-  Cloud,
-  HardDrive,
-  Search,
-  Trash2,
-  X,
-} from "lucide-react";
-import { AgGridReact, CustomCellRendererProps } from "ag-grid-react";
-import { useCallback, useState, useRef, ChangeEvent } from "react";
+import type { ColDef } from "ag-grid-community";
+import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
+import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
 import { useRouter } from "next/navigation";
+import { type ChangeEvent, useCallback, useRef, useState } from "react";
 import { SiGoogledrive } from "react-icons/si";
 import { TbBrandOnedrive } from "react-icons/tb";
 import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@@ -19,13 +13,13 @@ import { Button } from "@/components/ui/button";
 import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
 import { useTask } from "@/contexts/task-context";
 import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
-import { ColDef } from "ag-grid-community";
 import "@/components/AgGrid/registerAgGridModules";
 import "@/components/AgGrid/agGridStyles.css";
+import { toast } from "sonner";
 import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
+import { StatusBadge } from "@/components/ui/status-badge";
 import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
 import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
-import { toast } from "sonner";
 
 // Function to get the appropriate icon for a connector type
 function getSourceIcon(connectorType?: string) {
@@ -51,7 +45,7 @@ function getSourceIcon(connectorType?: string) {
 
 function SearchPage() {
   const router = useRouter();
-  const { isMenuOpen } = useTask();
+  const { isMenuOpen, files: taskFiles } = useTask();
   const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
     useKnowledgeFilter();
   const [selectedRows, setSelectedRows] = useState<File[]>([]);
@@ -61,14 +55,38 @@ function SearchPage() {
 
   const { data = [], isFetching } = useGetSearchQuery(
     parsedFilterData?.query || "*",
-    parsedFilterData
+    parsedFilterData,
   );
 
   const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
     gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
   };
 
-  const fileResults = data as File[];
+  // Convert TaskFiles to File format and merge with backend results
+  const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
+    return {
+      filename: taskFile.filename,
+      mimetype: taskFile.mimetype,
+      source_url: taskFile.source_url,
+      size: taskFile.size,
+      connector_type: taskFile.connector_type,
+      status: taskFile.status,
+    };
+  });
+
+  const backendFiles = data as File[];
+
+  const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
+    return (
+      taskFile.status !== "active" &&
+      !backendFiles.some(
+        (backendFile) => backendFile.filename === taskFile.filename,
+      )
+    );
+  });
+
+  // Combine task files first, then backend files
+  const fileResults = [...backendFiles, ...filteredTaskFiles];
 
   const gridRef = useRef<AgGridReact>(null);
 
@@ -82,13 +100,14 @@ function SearchPage() {
       minWidth: 220,
       cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
         return (
-          <div
-            className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors"
+          <button
+            type="button"
+            className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full"
             onClick={() => {
               router.push(
                 `/knowledge/chunks?filename=${encodeURIComponent(
-                  data?.filename ?? ""
-                )}`
+                  data?.filename ?? "",
+                )}`,
               );
             }}
           >
@@ -96,7 +115,7 @@ function SearchPage() {
             <span className="font-medium text-foreground truncate">
               {value}
             </span>
-          </div>
+          </button>
         );
       },
     },
@@ -119,6 +138,7 @@ function SearchPage() {
     {
       field: "chunkCount",
       headerName: "Chunks",
+      valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
     },
     {
       field: "avgScore",
@@ -127,11 +147,20 @@ function SearchPage() {
       cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
         return (
           <span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
-            {value.toFixed(2)}
+            {value?.toFixed(2) ?? "-"}
           </span>
         );
       },
     },
+    {
+      field: "status",
+      headerName: "Status",
+      cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
+        // Default to 'active' status if no status is provided
+        const status = data?.status || "active";
+        return <StatusBadge status={status} />;
+      },
+    },
     {
       cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
         return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
@@ -172,7 +201,7 @@ function SearchPage() {
     try {
       // Delete each file individually since the API expects one filename at a time
       const deletePromises = selectedRows.map((row) =>
-        deleteDocumentMutation.mutateAsync({ filename: row.filename })
+        deleteDocumentMutation.mutateAsync({ filename: row.filename }),
       );
 
       await Promise.all(deletePromises);
@@ -180,7 +209,7 @@ function SearchPage() {
       toast.success(
         `Successfully deleted ${selectedRows.length} document${
           selectedRows.length > 1 ? "s" : ""
-        }`
+        }`,
       );
       setSelectedRows([]);
       setShowBulkDeleteDialog(false);
@@ -193,7 +222,7 @@ function SearchPage() {
       toast.error(
         error instanceof Error
           ? error.message
-          : "Failed to delete some documents"
+          : "Failed to delete some documents",
       );
     }
   };
diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx
index eea555c2..f49ff393 100644
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@@ -4,11 +4,13 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react";
 import { useSearchParams } from "next/navigation";
 import { Suspense, useCallback, useEffect, useState } from "react";
 import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation";
+import {
+  useGetIBMModelsQuery,
+  useGetOllamaModelsQuery,
+  useGetOpenAIModelsQuery,
+} from "@/app/api/queries/useGetModelsQuery";
 import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery";
-import { useGetOpenAIModelsQuery, useGetOllamaModelsQuery, useGetIBMModelsQuery } from "@/app/api/queries/useGetModelsQuery";
 import { ConfirmationDialog } from "@/components/confirmation-dialog";
-import { ModelSelectItems } from "./helpers/model-select-item";
-import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
 import { ProtectedRoute } from "@/components/protected-route";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
@@ -33,6 +35,8 @@ import { Textarea } from "@/components/ui/textarea";
 import { useAuth } from "@/contexts/auth-context";
 import { useTask } from "@/contexts/task-context";
 import { useDebounce } from "@/lib/debounce";
+import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
+import { ModelSelectItems } from "./helpers/model-select-item";
 
 const MAX_SYSTEM_PROMPT_CHARS = 2000;
 
@@ -105,42 +109,46 @@ function KnowledgeSourcesPage() {
 
   // Fetch settings using React Query
   const { data: settings = {} } = useGetSettingsQuery({
-    enabled: isAuthenticated,
+    enabled: isAuthenticated || isNoAuthMode,
   });
 
   // Get the current provider from settings
-  const currentProvider = (settings.provider?.model_provider || 'openai') as ModelProvider;
+  const currentProvider = (settings.provider?.model_provider ||
+    "openai") as ModelProvider;
 
   // Fetch available models based on provider
   const { data: openaiModelsData } = useGetOpenAIModelsQuery(
     undefined, // Let backend use stored API key from configuration
     {
-      enabled: isAuthenticated && currentProvider === 'openai',
-    }
+      enabled:
+        (isAuthenticated || isNoAuthMode) && currentProvider === "openai",
+    },
   );
 
   const { data: ollamaModelsData } = useGetOllamaModelsQuery(
     undefined, // No params for now, could be extended later
     {
-      enabled: isAuthenticated && currentProvider === 'ollama',
-    }
+      enabled:
+        (isAuthenticated || isNoAuthMode) && currentProvider === "ollama",
+    },
   );
 
   const { data: ibmModelsData } = useGetIBMModelsQuery(
     undefined, // No params for now, could be extended later
     {
-      enabled: isAuthenticated && currentProvider === 'ibm',
-    }
+      enabled: (isAuthenticated || isNoAuthMode) && currentProvider === "ibm",
+    },
   );
 
   // Select the appropriate models data based on provider
-  const modelsData = currentProvider === 'openai'
-    ? openaiModelsData
-    : currentProvider === 'ollama'
-    ? ollamaModelsData
-    : currentProvider === 'ibm'
-    ? ibmModelsData
-    : openaiModelsData; // fallback to openai
+  const modelsData =
+    currentProvider === "openai"
+      ? openaiModelsData
+      : currentProvider === "ollama"
+      ? ollamaModelsData
+      : currentProvider === "ibm"
+      ? ibmModelsData
+      : openaiModelsData; // fallback to openai
 
   // Mutations
   const updateFlowSettingMutation = useUpdateFlowSettingMutation({
@@ -152,7 +160,6 @@ function KnowledgeSourcesPage() {
     },
   });
 
-
   // Debounced update function
   const debouncedUpdate = useDebounce(
     (variables: Parameters<typeof updateFlowSettingMutation.mutate>[0]) => {
@@ -224,7 +231,6 @@ function KnowledgeSourcesPage() {
     debouncedUpdate({ doclingPresets: mode });
   };
 
-
   // Helper function to get connector icon
   const getConnectorIcon = useCallback((iconName: string) => {
     const iconMap: { [key: string]: React.ReactElement } = {
@@ -613,7 +619,11 @@ function KnowledgeSourcesPage() {
                 Language Model
               </Label>
               <Select
-                value={settings.agent?.llm_model || modelsData?.language_models?.find(m => m.default)?.value || "gpt-4"}
+                value={
+                  settings.agent?.llm_model ||
+                  modelsData?.language_models?.find((m) => m.default)?.value ||
+                  "gpt-4"
+                }
                 onValueChange={handleModelChange}
               >
                 <SelectTrigger id="model-select">
@@ -638,10 +648,20 @@ function KnowledgeSourcesPage() {
                 value={systemPrompt}
                 onChange={(e) => setSystemPrompt(e.target.value)}
                 rows={6}
-                className={`resize-none ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'border-red-500 focus:border-red-500' : ''}`}
+                className={`resize-none ${
+                  systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
+                    ? "border-red-500 focus:border-red-500"
+                    : ""
+                }`}
               />
               <div className="flex justify-start">
-                <span className={`text-xs ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'text-red-500' : 'text-muted-foreground'}`}>
+                <span
+                  className={`text-xs ${
+                    systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
+                      ? "text-red-500"
+                      : "text-muted-foreground"
+                  }`}
+                >
                   {systemPrompt.length}/{MAX_SYSTEM_PROMPT_CHARS} characters
                 </span>
               </div>
@@ -649,7 +669,10 @@ function KnowledgeSourcesPage() {
             <div className="flex justify-end pt-2">
               <Button
                 onClick={handleSystemPromptSave}
-                disabled={updateFlowSettingMutation.isPending || systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS}
+                disabled={
+                  updateFlowSettingMutation.isPending ||
+                  systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
+                }
                 className="min-w-[120px]"
                 size="sm"
                 variant="outline"
@@ -736,7 +759,9 @@ function KnowledgeSourcesPage() {
               </Label>
               <Select
                 value={
-                  settings.knowledge?.embedding_model || modelsData?.embedding_models?.find(m => m.default)?.value || "text-embedding-ada-002"
+                  settings.knowledge?.embedding_model ||
+                  modelsData?.embedding_models?.find((m) => m.default)?.value ||
+                  "text-embedding-ada-002"
                 }
                 onValueChange={handleEmbeddingModelChange}
               >
@@ -746,7 +771,9 @@ function KnowledgeSourcesPage() {
                 <SelectContent>
                   <ModelSelectItems
                     models={modelsData?.embedding_models}
-                    fallbackModels={getFallbackModels(currentProvider).embedding}
+                    fallbackModels={
+                      getFallbackModels(currentProvider).embedding
+                    }
                     provider={currentProvider}
                   />
                 </SelectContent>
@@ -807,7 +834,10 @@ function KnowledgeSourcesPage() {
                 <div className="flex items-center space-x-3">
                   <RadioGroupItem value="standard" id="standard" />
                   <div className="flex-1">
-                    <Label htmlFor="standard" className="text-base font-medium cursor-pointer">
+                    <Label
+                      htmlFor="standard"
+                      className="text-base font-medium cursor-pointer"
+                    >
                       Standard
                     </Label>
                     <div className="text-sm text-muted-foreground">
@@ -818,18 +848,28 @@ function KnowledgeSourcesPage() {
                 <div className="flex items-center space-x-3">
                   <RadioGroupItem value="ocr" id="ocr" />
                   <div className="flex-1">
-                    <Label htmlFor="ocr" className="text-base font-medium cursor-pointer">
+                    <Label
+                      htmlFor="ocr"
+                      className="text-base font-medium cursor-pointer"
+                    >
                       Extract text from images
                     </Label>
                     <div className="text-sm text-muted-foreground">
-                      Uses OCR to extract text from images/PDFs. Ingest is slower when enabled
+                      Uses OCR to extract text from images/PDFs. Ingest is
+                      slower when enabled
                     </div>
                   </div>
                 </div>
                 <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="picture_description" id="picture_description" />
+                  <RadioGroupItem
+                    value="picture_description"
+                    id="picture_description"
+                  />
                   <div className="flex-1">
-                    <Label htmlFor="picture_description" className="text-base font-medium cursor-pointer">
+                    <Label
+                      htmlFor="picture_description"
+                      className="text-base font-medium cursor-pointer"
+                    >
                       Generate Description
                     </Label>
                     <div className="text-sm text-muted-foreground">
@@ -840,11 +880,15 @@ function KnowledgeSourcesPage() {
                 <div className="flex items-center space-x-3">
                   <RadioGroupItem value="VLM" id="VLM" />
                   <div className="flex-1">
-                    <Label htmlFor="VLM" className="text-base font-medium cursor-pointer">
+                    <Label
+                      htmlFor="VLM"
+                      className="text-base font-medium cursor-pointer"
+                    >
                       AI Vision
                     </Label>
                     <div className="text-sm text-muted-foreground">
-                      Advanced processing with vision language models. Highest quality but most expensive
+                      Advanced processing with vision language models. Highest
+                      quality but most expensive
                     </div>
                   </div>
                 </div>
diff --git a/frontend/src/components/ui/animated-processing-icon.tsx b/frontend/src/components/ui/animated-processing-icon.tsx
new file mode 100644
index 00000000..eb36b2ab
--- /dev/null
+++ b/frontend/src/components/ui/animated-processing-icon.tsx
@@ -0,0 +1,49 @@
+interface AnimatedProcessingIconProps {
+  className?: string;
+  size?: number;
+}
+
+export const AnimatedProcessingIcon = ({
+  className = "",
+  size = 10,
+}: AnimatedProcessingIconProps) => {
+  const width = Math.round((size * 6) / 10);
+  const height = size;
+
+  return (
+    <svg
+      width={width}
+      height={height}
+      viewBox="0 0 6 10"
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+      className={className}
+    >
+      <style>
+        {`
+          .dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
+          .dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
+          .dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
+          .dot-4 { animation: pulse-wave 1.5s infinite; animation-delay: 0.3s; }
+          .dot-5 { animation: pulse-wave 1.5s infinite; animation-delay: 0.4s; }
+          
+          @keyframes pulse-wave {
+            0%, 60%, 100% { 
+              opacity: 0.25; 
+              transform: scale(1);
+            }
+            30% { 
+              opacity: 1; 
+              transform: scale(1.2);
+            }
+          }
+        `}
+      </style>
+      <circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" />
+      <circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" />
+      <circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" />
+      <circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" />
+      <circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" />
+    </svg>
+  );
+};
diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx
new file mode 100644
index 00000000..f0f63241
--- /dev/null
+++ b/frontend/src/components/ui/status-badge.tsx
@@ -0,0 +1,58 @@
+import { AnimatedProcessingIcon } from "./animated-processing-icon";
+
+export type Status =
+  | "processing"
+  | "active"
+  | "unavailable"
+  | "hidden"
+  | "sync"
+  | "failed";
+
+interface StatusBadgeProps {
+  status: Status;
+  className?: string;
+}
+
+const statusConfig = {
+  processing: {
+    label: "Processing",
+    className: "text-muted-foreground dark:text-muted-foreground ",
+  },
+  active: {
+    label: "Active",
+    className: "text-emerald-600 dark:text-emerald-400 ",
+  },
+  unavailable: {
+    label: "Unavailable",
+    className: "text-red-600 dark:text-red-400 ",
+  },
+  failed: {
+    label: "Failed",
+    className: "text-red-600 dark:text-red-400 ",
+  },
+  hidden: {
+    label: "Hidden",
+    className: "text-zinc-400 dark:text-zinc-500 ",
+  },
+  sync: {
+    label: "Sync",
+    className: "text-amber-700 dark:text-amber-300 underline",
+  },
+};
+
+export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
+  const config = statusConfig[status];
+
+  return (
+    <div
+      className={`inline-flex items-center gap-1 ${config.className} ${
+        className || ""
+      }`}
+    >
+      {status === "processing" && (
+        <AnimatedProcessingIcon className="text-current mr-2" size={10} />
+      )}
+      {config.label}
+    </div>
+  );
+};
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx
index f15e9cc1..4b6c18c2 100644
--- a/frontend/src/contexts/task-context.tsx
+++ b/frontend/src/contexts/task-context.tsx
@@ -35,9 +35,22 @@ export interface Task {
   files?: Record<string, Record<string, unknown>>;
 }
 
+export interface TaskFile {
+  filename: string;
+  mimetype: string;
+  source_url: string;
+  size: number;
+  connector_type: string;
+  status: "active" | "failed" | "processing";
+  task_id: string;
+  created_at: string;
+  updated_at: string;
+}
 interface TaskContextType {
   tasks: Task[];
+  files: TaskFile[];
   addTask: (taskId: string) => void;
+  addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
   removeTask: (taskId: string) => void;
   refreshTasks: () => Promise<void>;
   cancelTask: (taskId: string) => Promise<void>;
@@ -51,6 +64,7 @@ const TaskContext = createContext<TaskContextType | undefined>(undefined);
 
 export function TaskProvider({ children }: { children: React.ReactNode }) {
   const [tasks, setTasks] = useState<Task[]>([]);
+  const [files, setFiles] = useState<TaskFile[]>([]);
   const [isPolling, setIsPolling] = useState(false);
   const [isFetching, setIsFetching] = useState(false);
   const [isMenuOpen, setIsMenuOpen] = useState(false);
@@ -58,12 +72,32 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
 
   const queryClient = useQueryClient();
 
-  const refetchSearch = () => {
+  const refetchSearch = useCallback(() => {
     queryClient.invalidateQueries({
       queryKey: ["search"],
       exact: false,
     });
-  };
+  }, [queryClient]);
+
+  const addFiles = useCallback(
+    (newFiles: Partial<TaskFile>[], taskId: string) => {
+      const now = new Date().toISOString();
+      const filesToAdd: TaskFile[] = newFiles.map((file) => ({
+        filename: file.filename || "",
+        mimetype: file.mimetype || "",
+        source_url: file.source_url || "",
+        size: file.size || 0,
+        connector_type: file.connector_type || "local",
+        status: "processing",
+        task_id: taskId,
+        created_at: now,
+        updated_at: now,
+      }));
+
+      setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
+    },
+    [],
+  );
 
   const fetchTasks = useCallback(async () => {
     if (!isAuthenticated && !isNoAuthMode) return;
@@ -76,13 +110,87 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
         const newTasks = data.tasks || [];
 
         // Update tasks and check for status changes in the same state update
-        setTasks(prevTasks => {
+        setTasks((prevTasks) => {
           // Check for newly completed tasks to show toasts
           if (prevTasks.length > 0) {
             newTasks.forEach((newTask: Task) => {
               const oldTask = prevTasks.find(
-                t => t.task_id === newTask.task_id
+                (t) => t.task_id === newTask.task_id,
               );
+
+              // Update or add files from task.files if available
+              if (newTask.files && typeof newTask.files === "object") {
+                const taskFileEntries = Object.entries(newTask.files);
+                const now = new Date().toISOString();
+
+                taskFileEntries.forEach(([filePath, fileInfo]) => {
+                  if (typeof fileInfo === "object" && fileInfo) {
+                    const fileName = filePath.split("/").pop() || filePath;
+                    const fileStatus = fileInfo.status as string;
+
+                    // Map backend file status to our TaskFile status
+                    let mappedStatus: TaskFile["status"];
+                    switch (fileStatus) {
+                      case "pending":
+                      case "running":
+                        mappedStatus = "processing";
+                        break;
+                      case "completed":
+                        mappedStatus = "active";
+                        break;
+                      case "failed":
+                        mappedStatus = "failed";
+                        break;
+                      default:
+                        mappedStatus = "processing";
+                    }
+
+                    setFiles((prevFiles) => {
+                      const existingFileIndex = prevFiles.findIndex(
+                        (f) =>
+                          f.source_url === filePath &&
+                          f.task_id === newTask.task_id,
+                      );
+
+                      // Detect connector type based on file path or other indicators
+                      let connectorType = "local";
+                      if (filePath.includes("/") && !filePath.startsWith("/")) {
+                        // Likely S3 key format (bucket/path/file.ext)
+                        connectorType = "s3";
+                      }
+
+                      const fileEntry: TaskFile = {
+                        filename: fileName,
+                        mimetype: "", // We don't have this info from the task
+                        source_url: filePath,
+                        size: 0, // We don't have this info from the task
+                        connector_type: connectorType,
+                        status: mappedStatus,
+                        task_id: newTask.task_id,
+                        created_at:
+                          typeof fileInfo.created_at === "string"
+                            ? fileInfo.created_at
+                            : now,
+                        updated_at:
+                          typeof fileInfo.updated_at === "string"
+                            ? fileInfo.updated_at
+                            : now,
+                      };
+
+                      if (existingFileIndex >= 0) {
+                        // Update existing file
+                        const updatedFiles = [...prevFiles];
+                        updatedFiles[existingFileIndex] = fileEntry;
+                        return updatedFiles;
+                      } else {
+                        // Add new file
+                        return [...prevFiles, fileEntry];
+                      }
+                    });
+                  }
+                });
+              }
+
               if (
                 oldTask &&
                 oldTask.status !== "completed" &&
@@ -99,9 +207,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
                 refetchSearch();
                 // Dispatch knowledge updated event for all knowledge-related pages
                 console.log(
-                  "Task completed successfully, dispatching knowledgeUpdated event"
+                  "Task completed successfully, dispatching knowledgeUpdated event",
                 );
                 window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
+
+                // Remove files for this completed task from the files list
+                setFiles((prevFiles) =>
+                  prevFiles.filter((file) => file.task_id !== newTask.task_id),
+                );
               } else if (
                 oldTask &&
                 oldTask.status !== "failed" &&
@@ -114,6 +227,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
                     newTask.error || "Unknown error"
                   }`,
                 });
+
+                // Files will be updated to failed status by the file parsing logic above
               }
             });
           }
@@ -126,7 +241,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
     } finally {
       setIsFetching(false);
     }
-  }, [isAuthenticated, isNoAuthMode]); // Removed 'tasks' from dependencies to prevent infinite loop!
+  }, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
 
   const addTask = useCallback((taskId: string) => {
     // Immediately start aggressive polling for the new task
@@ -140,19 +255,21 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
           const data = await response.json();
           const newTasks = data.tasks || [];
           const foundTask = newTasks.find(
-            (task: Task) => task.task_id === taskId
+            (task: Task) => task.task_id === taskId,
           );
 
           if (foundTask) {
             // Task found! Update the tasks state
-            setTasks(prevTasks => {
+            setTasks((prevTasks) => {
               // Check if task is already in the list
-              const exists = prevTasks.some(t => t.task_id === taskId);
+              const exists = prevTasks.some((t) => t.task_id === taskId);
               if (!exists) {
                 return [...prevTasks, foundTask];
               }
               // Update existing task
-              return prevTasks.map(t => (t.task_id === taskId ? foundTask : t));
+              return prevTasks.map((t) =>
+                t.task_id === taskId ? foundTask : t,
+              );
             });
             return; // Stop polling, we found it
           }
@@ -177,7 +294,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
   }, [fetchTasks]);
 
   const removeTask = useCallback((taskId: string) => {
-    setTasks(prev => prev.filter(task => task.task_id !== taskId));
+    setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
   }, []);
 
   const cancelTask = useCallback(
@@ -204,11 +321,11 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
         });
       }
     },
-    [fetchTasks]
+    [fetchTasks],
   );
 
   const toggleMenu = useCallback(() => {
-    setIsMenuOpen(prev => !prev);
+    setIsMenuOpen((prev) => !prev);
   }, []);
 
   // Periodic polling for task updates
@@ -231,7 +348,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
 
   const value: TaskContextType = {
     tasks,
+    files,
     addTask,
+    addFiles,
     removeTask,
     refreshTasks,
     cancelTask,
diff --git a/src/api/models.py b/src/api/models.py
index d79d1b23..fa7269f3 100644
--- a/src/api/models.py
+++ b/src/api/models.py
@@ -17,14 +17,18 @@ async def get_openai_models(request, models_service, session_manager):
             try:
                 config = get_openrag_config()
                 api_key = config.provider.api_key
-                logger.info(f"Retrieved API key from config: {'yes' if api_key else 'no'}")
+                logger.info(
+                    f"Retrieved API key from config: {'yes' if api_key else 'no'}"
+                )
             except Exception as e:
                 logger.error(f"Failed to get config: {e}")
 
         if not api_key:
             return JSONResponse(
-                {"error": "OpenAI API key is required either as query parameter or in configuration"},
-                status_code=400
+                {
+                    "error": "OpenAI API key is required either as query parameter or in configuration"
+                },
+                status_code=400,
             )
 
         models = await models_service.get_openai_models(api_key=api_key)
@@ -32,8 +36,7 @@ async def get_openai_models(request, models_service, session_manager):
     except Exception as e:
         logger.error(f"Failed to get OpenAI models: {str(e)}")
         return JSONResponse(
-            {"error": f"Failed to retrieve OpenAI models: {str(e)}"},
-            status_code=500
+            {"error": f"Failed to retrieve OpenAI models: {str(e)}"}, status_code=500
         )
 
 
@@ -44,13 +47,31 @@ async def get_ollama_models(request, models_service, session_manager):
         query_params = dict(request.query_params)
         endpoint = query_params.get("endpoint")
 
+        # If no API key provided, try to get it from stored configuration
+        if not endpoint:
+            try:
+                config = get_openrag_config()
+                endpoint = config.provider.endpoint
+                logger.info(
+                    f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
+                )
+            except Exception as e:
+                logger.error(f"Failed to get config: {e}")
+
+        if not endpoint:
+            return JSONResponse(
+                {
+                    "error": "Endpoint is required either as query parameter or in configuration"
+                },
+                status_code=400,
+            )
+
         models = await models_service.get_ollama_models(endpoint=endpoint)
         return JSONResponse(models)
     except Exception as e:
         logger.error(f"Failed to get Ollama models: {str(e)}")
         return JSONResponse(
-            {"error": f"Failed to retrieve Ollama models: {str(e)}"},
-            status_code=500
+            {"error": f"Failed to retrieve Ollama models: {str(e)}"}, status_code=500
         )
 
 
@@ -63,15 +84,65 @@ async def get_ibm_models(request, models_service, session_manager):
         api_key = query_params.get("api_key")
         project_id = query_params.get("project_id")
 
+        config = get_openrag_config()
+        # If no API key provided, try to get it from stored configuration
+        if not api_key:
+            try:
+                api_key = config.provider.api_key
+                logger.info(
+                    f"Retrieved API key from config: {'yes' if api_key else 'no'}"
+                )
+            except Exception as e:
+                logger.error(f"Failed to get config: {e}")
+
+        if not api_key:
+            return JSONResponse(
+                {
+                    "error": "OpenAI API key is required either as query parameter or in configuration"
+                },
+                status_code=400,
+            )
+
+        if not endpoint:
+            try:
+                endpoint = config.provider.endpoint
+                logger.info(
+                    f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
+                )
+            except Exception as e:
+                logger.error(f"Failed to get config: {e}")
+
+        if not endpoint:
+            return JSONResponse(
+                {
+                    "error": "Endpoint is required either as query parameter or in configuration"
+                },
+                status_code=400,
+            )
+
+        if not project_id:
+            try:
+                project_id = config.provider.project_id
+                logger.info(
+                    f"Retrieved project ID from config: {'yes' if project_id else 'no'}"
+                )
+            except Exception as e:
+                logger.error(f"Failed to get config: {e}")
+
+        if not project_id:
+            return JSONResponse(
+                {
+                    "error": "Project ID is required either as query parameter or in configuration"
+                },
+                status_code=400,
+            )
+
         models = await models_service.get_ibm_models(
-            endpoint=endpoint,
-            api_key=api_key,
-            project_id=project_id
+            endpoint=endpoint, api_key=api_key, project_id=project_id
         )
         return JSONResponse(models)
     except Exception as e:
         logger.error(f"Failed to get IBM models: {str(e)}")
         return JSONResponse(
-            {"error": f"Failed to retrieve IBM models: {str(e)}"},
-            status_code=500
-        )
\ No newline at end of file
+            {"error": f"Failed to retrieve IBM models: {str(e)}"}, status_code=500
+        )
diff --git a/src/services/task_service.py b/src/services/task_service.py
index c9328b90..de297dff 100644
--- a/src/services/task_service.py
+++ b/src/services/task_service.py
@@ -17,7 +17,9 @@ class TaskService:
     def __init__(self, document_service=None, process_pool=None):
         self.document_service = document_service
         self.process_pool = process_pool
-        self.task_store: dict[str, dict[str, UploadTask]] = {}  # user_id -> {task_id -> UploadTask}
+        self.task_store: dict[
+            str, dict[str, UploadTask]
+        ] = {}  # user_id -> {task_id -> UploadTask}
         self.background_tasks = set()
 
         if self.process_pool is None:
@@ -122,18 +124,27 @@ class TaskService:
 
             # Process files with limited concurrency to avoid overwhelming the system
             max_workers = get_worker_count()
-            semaphore = asyncio.Semaphore(max_workers * 2)  # Allow 2x process pool size for async I/O
+            semaphore = asyncio.Semaphore(
+                max_workers * 2
+            )  # Allow 2x process pool size for async I/O
 
             async def process_with_semaphore(file_path: str):
                 async with semaphore:
-                    await self.document_service.process_single_file_task(upload_task, file_path)
+                    await self.document_service.process_single_file_task(
+                        upload_task, file_path
+                    )
 
-            tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()]
+            tasks = [
+                process_with_semaphore(file_path)
+                for file_path in upload_task.file_tasks.keys()
+            ]
 
             await asyncio.gather(*tasks, return_exceptions=True)
 
         except Exception as e:
-            logger.error("Background upload processor failed", task_id=task_id, error=str(e))
+            logger.error(
+                "Background upload processor failed", task_id=task_id, error=str(e)
+            )
             import traceback
 
             traceback.print_exc()
@@ -141,7 +152,9 @@ class TaskService:
                 self.task_store[user_id][task_id].status = TaskStatus.FAILED
                 self.task_store[user_id][task_id].updated_at = time.time()
 
-    async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None:
+    async def background_custom_processor(
+        self, user_id: str, task_id: str, items: list
+    ) -> None:
         """Background task to process items using custom processor"""
         try:
             upload_task = self.task_store[user_id][task_id]
@@ -163,7 +176,9 @@ class TaskService:
                     try:
                         await processor.process_item(upload_task, item, file_task)
                     except Exception as e:
-                        logger.error("Failed to process item", item=str(item), error=str(e))
+                        logger.error(
+                            "Failed to process item", item=str(item), error=str(e)
+                        )
                         import traceback
 
                         traceback.print_exc()
@@ -190,7 +205,9 @@ class TaskService:
                 pass
             raise  # Re-raise to properly handle cancellation
         except Exception as e:
-            logger.error("Background custom processor failed", task_id=task_id, error=str(e))
+            logger.error(
+                "Background custom processor failed", task_id=task_id, error=str(e)
+            )
             import traceback
 
             traceback.print_exc()
@@ -212,7 +229,10 @@ class TaskService:
 
         upload_task = None
         for candidate_user_id in candidate_user_ids:
-            if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
+            if (
+                candidate_user_id in self.task_store
+                and task_id in self.task_store[candidate_user_id]
+            ):
                 upload_task = self.task_store[candidate_user_id][task_id]
                 break
 
@@ -271,10 +291,23 @@ class TaskService:
                 if task_id in tasks_by_id:
                     continue
 
-                # Calculate running and pending counts
+                # Calculate running and pending counts and build file statuses
                 running_files_count = 0
                 pending_files_count = 0
-                for file_task in upload_task.file_tasks.values():
+                file_statuses = {}
+
+                for file_path, file_task in upload_task.file_tasks.items():
+                    if file_task.status.value != "completed":
+                        file_statuses[file_path] = {
+                            "status": file_task.status.value,
+                            "result": file_task.result,
+                            "error": file_task.error,
+                            "retry_count": file_task.retry_count,
+                            "created_at": file_task.created_at,
+                            "updated_at": file_task.updated_at,
+                            "duration_seconds": file_task.duration_seconds,
+                        }
+
                     if file_task.status.value == "running":
                         running_files_count += 1
                     elif file_task.status.value == "pending":
@@ -292,6 +325,7 @@ class TaskService:
                     "created_at": upload_task.created_at,
                     "updated_at": upload_task.updated_at,
                     "duration_seconds": upload_task.duration_seconds,
+                    "files": file_statuses,
                 }
 
         # First, add user-owned tasks; then shared anonymous;
@@ -312,7 +346,10 @@ class TaskService:
 
         store_user_id = None
         for candidate_user_id in candidate_user_ids:
-            if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
+            if (
+                candidate_user_id in self.task_store
+                and task_id in self.task_store[candidate_user_id]
+            ):
                 store_user_id = candidate_user_id
                 break
 
@@ -326,7 +363,10 @@ class TaskService:
             return False
 
         # Cancel the background task to stop scheduling new work
-        if hasattr(upload_task, "background_task") and not upload_task.background_task.done():
+        if (
+            hasattr(upload_task, "background_task")
+            and not upload_task.background_task.done()
+        ):
             upload_task.background_task.cancel()
 
         # Mark task as failed (cancelled)