From 99bf3772b50131ddbdee9e8c09435bb900f0ff3b Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Fri, 19 Sep 2025 12:16:20 -0600 Subject: [PATCH 01/19] updated header for chunks page --- frontend/src/app/knowledge/chunks/page.tsx | 67 ++++++++++++++++------ 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 9385c474..254eb511 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -1,8 +1,10 @@ "use client"; import { + ArrowLeft, Building2, Cloud, + File as FileIcon, FileText, HardDrive, Loader2, @@ -21,6 +23,9 @@ import { type File, useGetSearchQuery, } from "../../api/queries/useGetSearchQuery"; +import { Label } from "@/components/ui/label"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Input } from "@/components/ui/input"; // Function to get the appropriate icon for a connector type function getSourceIcon(connectorType?: string) { @@ -47,9 +52,14 @@ function ChunksPageContent() { const filename = searchParams.get("filename"); const [chunks, setChunks] = useState([]); + const [selectAll, setSelectAll] = useState(false); + const [queryInputText, setQueryInputText] = useState( + parsedFilterData?.query ?? "" + ); + // Use the same search query as the knowledge page, but we'll filter for the specific file const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); - + console.log({ data }); // Extract chunks for the specific file useEffect(() => { if (!filename || !(data as File[]).length) { @@ -98,30 +108,53 @@ function ChunksPageContent() { >
{/* Header */} -
-
- -
-

Document Chunks

-

- {decodeURIComponent(filename)} -

+
+
+
+ setSelectAll(checked === true)} + /> + +
+
+ setQueryInputText(e.target.value)} + placeholder="Search chunks..." + className="flex-1 bg-muted/20 rounded-lg border border-border/50 px-4 py-3 focus-visible:ring-1 focus-visible:ring-ring" + /> +
-
+ {/*
{!isFetching && chunks.length > 0 && ( {chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found )} -
+
*/}
{/* Content Area - matches knowledge page structure */} From 9fdec36e9c5c6a7369738c53904a49ed358b6ed5 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 22 Sep 2025 13:06:19 -0600 Subject: [PATCH 02/19] make the search filter work --- frontend/src/app/knowledge/chunks/page.tsx | 107 +++++++++++---------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 254eb511..450032a8 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -2,18 +2,13 @@ import { ArrowLeft, - Building2, - Cloud, + Copy, File as FileIcon, - FileText, - HardDrive, Loader2, Search, } from "lucide-react"; import { Suspense, useCallback, useEffect, useState } from "react"; import { useRouter, useSearchParams } from "next/navigation"; -import { SiGoogledrive } from "react-icons/si"; -import { TbBrandOnedrive } from "react-icons/tb"; import { ProtectedRoute } from "@/components/protected-route"; import { Button } from "@/components/ui/button"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; @@ -27,22 +22,6 @@ import { Label } from "@/components/ui/label"; import { Checkbox } from "@/components/ui/checkbox"; import { Input } from "@/components/ui/input"; -// Function to get the appropriate icon for a connector type -function getSourceIcon(connectorType?: string) { - switch (connectorType) { - case "google_drive": - return ; - case "onedrive": - return ; - case "sharepoint": - return ; - case "s3": - return ; - default: - return ; - } -} - function ChunksPageContent() { const router = useRouter(); const searchParams = useSearchParams(); @@ -51,12 +30,32 @@ function ChunksPageContent() { const filename = searchParams.get("filename"); const [chunks, setChunks] = useState([]); + const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< + ChunkResult[] + >([]); const [selectAll, setSelectAll] = useState(false); const [queryInputText, setQueryInputText] = useState( parsedFilterData?.query ?? "" ); + useEffect(() => { + if (queryInputText === "") { + setChunksFilteredByQuery(chunks); + } else { + setChunksFilteredByQuery((prevChunks) => + prevChunks.filter((chunk) => + chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) + ) + ); + } + }, [queryInputText, chunks]); + + const handleCopy = useCallback((text: string) => { + console.log("copying text to clipboard:", text); + navigator.clipboard.writeText(text); + }, []); + // Use the same search query as the knowledge page, but we'll filter for the specific file const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); console.log({ data }); @@ -118,7 +117,7 @@ function ChunksPageContent() {
-
+
- {/*
- {!isFetching && chunks.length > 0 && ( - - {chunks.length} chunk{chunks.length !== 1 ? "s" : ""} found - - )} -
*/}
{/* Content Area - matches knowledge page structure */} @@ -180,35 +172,48 @@ function ChunksPageContent() {
) : (
- {chunks.map((chunk, index) => ( + {chunksFilteredByQuery.map((chunk, index) => (
-
- - - {chunk.filename} +
+
+ +
+ + Chunk {chunk.page} - {chunk.connector_type && ( -
- {getSourceIcon(chunk.connector_type)} -
- )} + + {chunk.text.length} chars + +
+ +
- - {chunk.score.toFixed(2)} - + + {/* TODO: Update to use active toggle */} + {/* + + Active + */}
-
- {chunk.mimetype} - Page {chunk.page} - {chunk.owner_name && Owner: {chunk.owner_name}} +
+
+ {chunk.text} +
-

- {chunk.text} -

))}
From fc5f67e244f02f18523e95824bb7be13d006cdd9 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 22 Sep 2025 14:23:03 -0600 Subject: [PATCH 03/19] Added technical details section --- frontend/src/app/knowledge/chunks/page.tsx | 112 +++++++++++++++++++-- 1 file changed, 101 insertions(+), 11 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 450032a8..0a5a00a3 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -7,7 +7,7 @@ import { Loader2, Search, } from "lucide-react"; -import { Suspense, useCallback, useEffect, useState } from "react"; +import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; import { useRouter, useSearchParams } from "next/navigation"; import { ProtectedRoute } from "@/components/protected-route"; import { Button } from "@/components/ui/button"; @@ -22,6 +22,12 @@ import { Label } from "@/components/ui/label"; import { Checkbox } from "@/components/ui/checkbox"; import { Input } from "@/components/ui/input"; +const getFileTypeLabel = (mimetype: string) => { + if (mimetype === "application/pdf") return "PDF"; + if (mimetype === "text/plain") return "Text"; + if (mimetype === "application/msword") return "Word Document"; +}; + function ChunksPageContent() { const router = useRouter(); const searchParams = useSearchParams(); @@ -33,12 +39,21 @@ function ChunksPageContent() { const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< ChunkResult[] >([]); + const averageChunkLength = useMemo( + () => + chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / + chunks.length || 0, + [chunks] + ); const [selectAll, setSelectAll] = useState(false); const [queryInputText, setQueryInputText] = useState( parsedFilterData?.query ?? "" ); + // Use the same search query as the knowledge page, but we'll filter for the specific file + const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); + useEffect(() => { if (queryInputText === "") { setChunksFilteredByQuery(chunks); @@ -52,13 +67,14 @@ function ChunksPageContent() { }, [queryInputText, chunks]); const handleCopy = useCallback((text: string) => { - console.log("copying text to clipboard:", text); navigator.clipboard.writeText(text); }, []); - // Use the same search query as the knowledge page, but we'll filter for the specific file - const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); - console.log({ data }); + const fileData = (data as File[]).find( + (file: File) => file.filename === filename + ); + + console.log({ fileData }); // Extract chunks for the specific file useEffect(() => { if (!filename || !(data as File[]).length) { @@ -66,11 +82,8 @@ function ChunksPageContent() { return; } - const fileData = (data as File[]).find( - (file: File) => file.filename === filename - ); setChunks(fileData?.chunks || []); - }, [data, filename]); + }, [data, filename, fileData?.chunks]); const handleBack = useCallback(() => { router.back(); @@ -90,9 +103,11 @@ function ChunksPageContent() { ); } + console.log({ data }); + return (
*/}
-
+
{chunk.text}
@@ -220,6 +235,81 @@ function ChunksPageContent() { )}
+ {/* Right panel - Summary (TODO), Technical details, */} +
+
+

Technical details

+
+
+
Total chunks
+
+ {chunks.length} +
+
+
+
Avg length
+
+ {averageChunkLength.toFixed(0)} chars +
+
+
+
Process time
+
+ {/* {averageChunkLength.toFixed(0)} chars */} +
+
+
+
Model
+
+ {/* {averageChunkLength.toFixed(0)} chars */} +
+
+
+
+
+

Original document

+
+
+
Name
+
+ {fileData?.filename} +
+
+
+
Type
+
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} +
+
+
+
Size
+
+ {fileData?.size + ? `${Math.round(fileData.size / 1024)} KB` + : "Unknown"} +
+
+
+
Uploaded
+
+ {fileData?.uploaded || "Unknown"} +
+
+
+
Source
+
+ {/* {fileData?.uploaded || "Unknown"} */} +
+
+
+
Updated
+
+ N/A +
+
+
+
+
); } From 13f75411900eedb0dee46a960f72c6271ec0f102 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 22 Sep 2025 14:23:50 -0600 Subject: [PATCH 04/19] finish stubbing data --- frontend/src/app/knowledge/chunks/page.tsx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 0a5a00a3..e9b66187 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -292,14 +292,12 @@ function ChunksPageContent() {
Uploaded
- {fileData?.uploaded || "Unknown"} + N/A
Source
-
- {/* {fileData?.uploaded || "Unknown"} */} -
+
Updated
From 8a17cccf3d7fc87e706bfb26ebaa9c7aecda3bd4 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Mon, 22 Sep 2025 14:29:13 -0600 Subject: [PATCH 05/19] remove console.logs --- frontend/src/app/knowledge/chunks/page.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index e9b66187..b59a8760 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -74,7 +74,6 @@ function ChunksPageContent() { (file: File) => file.filename === filename ); - console.log({ fileData }); // Extract chunks for the specific file useEffect(() => { if (!filename || !(data as File[]).length) { @@ -103,8 +102,6 @@ function ChunksPageContent() { ); } - console.log({ data }); - return (
Date: Mon, 22 Sep 2025 16:58:12 -0500 Subject: [PATCH 06/19] update flow based on docling presets --- src/api/settings.py | 99 ++++++++++++++++++++++++++++++++++++++++++ src/config/settings.py | 3 ++ src/main.py | 13 +++++- 3 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/api/settings.py b/src/api/settings.py index c169b263..9723cdeb 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -7,6 +7,7 @@ from config.settings import ( LANGFLOW_CHAT_FLOW_ID, LANGFLOW_INGEST_FLOW_ID, LANGFLOW_PUBLIC_URL, + DOCLING_COMPONENT_ID, clients, get_openrag_config, config_manager, @@ -234,6 +235,15 @@ async def update_settings(request, session_manager): current_config.knowledge.doclingPresets = body["doclingPresets"] config_updated = True + # Also update the flow with the new docling preset + try: + await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]]) + logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'") + except Exception as e: + logger.error(f"Failed to update docling preset in flow: {str(e)}") + # Don't fail the entire settings update if flow update fails + # The config will still be saved + if "chunk_size" in body: if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0: return JSONResponse( @@ -527,3 +537,92 @@ async def onboarding(request, flows_service): {"error": f"Failed to update onboarding settings: {str(e)}"}, status_code=500, ) + + +async def _update_flow_docling_preset(preset: str, preset_config: dict): + """Helper function to update docling preset in the ingest flow""" + if not LANGFLOW_INGEST_FLOW_ID: + raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured") + + # Get the current flow data from Langflow + response = await clients.langflow_request( + "GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}" + ) + + if response.status_code != 200: + raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}") + + flow_data = response.json() + + # Find the target node in the flow using environment variable + nodes = flow_data.get("data", {}).get("nodes", []) + target_node = None + target_node_index = None + + for i, node in enumerate(nodes): + if node.get("id") == DOCLING_COMPONENT_ID: + target_node = node + target_node_index = i + break + + if target_node is None: + raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow") + + # Update the docling_serve_opts value directly in the existing node + if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")): + flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config + else: + raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'") + + # Update the flow via PATCH request + patch_response = await clients.langflow_request( + "PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data + ) + + if patch_response.status_code != 200: + raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}") + + +async def update_docling_preset(request, session_manager): + """Update docling preset in the ingest flow""" + try: + # Parse request body + body = await request.json() + + # Validate preset parameter + if "preset" not in body: + return JSONResponse( + {"error": "preset parameter is required"}, + status_code=400 + ) + + preset = body["preset"] + preset_configs = get_docling_preset_configs() + + if preset not in preset_configs: + valid_presets = list(preset_configs.keys()) + return JSONResponse( + {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"}, + status_code=400 + ) + + # Get the preset configuration + preset_config = preset_configs[preset] + + # Use the helper function to update the flow + await _update_flow_docling_preset(preset, preset_config) + + logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow") + + return JSONResponse({ + "message": f"Successfully updated docling preset to '{preset}'", + "preset": preset, + "preset_config": preset_config + }) + + except Exception as e: + logger.error("Failed to update docling preset", error=str(e)) + return JSONResponse( + {"error": f"Failed to update docling preset: {str(e)}"}, + status_code=500 + ) diff --git a/src/config/settings.py b/src/config/settings.py index 66f78ce5..11e4b835 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -528,6 +528,9 @@ OLLAMA_EMBEDDING_COMPONENT_ID = os.getenv( ) OLLAMA_LLM_COMPONENT_ID = os.getenv("OLLAMA_LLM_COMPONENT_ID", "OllamaModel-eCsJx") +# Docling component ID for ingest flow +DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX") + # Global clients instance clients = AppClients() diff --git a/src/main.py b/src/main.py index e7cca718..f78e07bc 100644 --- a/src/main.py +++ b/src/main.py @@ -971,12 +971,23 @@ async def create_app(): "/onboarding", require_auth(services["session_manager"])( partial( - settings.onboarding, + settings.onboarding, flows_service=services["flows_service"] ) ), methods=["POST"], ), + # Docling preset update endpoint + Route( + "/settings/docling-preset", + require_auth(services["session_manager"])( + partial( + settings.update_docling_preset, + session_manager=services["session_manager"] + ) + ), + methods=["PATCH"], + ), Route( "/nudges", require_auth(services["session_manager"])( From f861f952201a8d09b75dfd4af5d3a7978aa229a1 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Mon, 22 Sep 2025 17:05:46 -0500 Subject: [PATCH 07/19] add docling ingest flow --- .env.example | 2 + flows/openrag_ingest_docling.json | 2220 +++++++++++++++++++++++++++++ 2 files changed, 2222 insertions(+) create mode 100644 flows/openrag_ingest_docling.json diff --git a/.env.example b/.env.example index 45b7676b..fe908795 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,8 @@ LANGFLOW_SECRET_KEY= # flow ids for chat and ingestion flows LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0 LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813 +# Ingest flow using docling +LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915 NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c # Set a strong admin password for OpenSearch; a bcrypt hash is generated at diff --git a/flows/openrag_ingest_docling.json b/flows/openrag_ingest_docling.json new file mode 100644 index 00000000..cd6d7d39 --- /dev/null +++ b/flows/openrag_ingest_docling.json @@ -0,0 +1,2220 @@ +{ + "data": { + "edges": [ + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "SplitText", + "id": "SplitText-3ZI5B", + "name": "dataframe", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "ingest_data", + "id": "OpenSearchHybrid-XtKoA", + "inputTypes": [ + "Data", + "DataFrame" + ], + "type": "other" + } + }, + "id": "reactflow__edge-SplitText-3ZI5B{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-3ZI5Bœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-OpenSearchHybrid-XtKoA{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "selected": false, + "source": "SplitText-3ZI5B", + "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-3ZI5Bœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "OpenSearchHybrid-XtKoA", + "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "OpenAIEmbeddings", + "id": "OpenAIEmbeddings-mP45L", + "name": "embeddings", + "output_types": [ + "Embeddings" + ] + }, + "targetHandle": { + "fieldName": "embedding", + "id": "OpenSearchHybrid-XtKoA", + "inputTypes": [ + "Embeddings" + ], + "type": "other" + } + }, + "id": "reactflow__edge-OpenAIEmbeddings-mP45L{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-mP45Lœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-OpenSearchHybrid-XtKoA{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "selected": false, + "source": "OpenAIEmbeddings-mP45L", + "sourceHandle": "{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-mP45Lœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}", + "target": "OpenSearchHybrid-XtKoA", + "targetHandle": "{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "DoclingRemote", + "id": "DoclingRemote-78KoX", + "name": "dataframe", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "data_inputs", + "id": "ExportDoclingDocument-xFoCI", + "inputTypes": [ + "Data", + "DataFrame" + ], + "type": "other" + } + }, + "id": "xy-edge__DoclingRemote-78KoX{œdataTypeœ:œDoclingRemoteœ,œidœ:œDoclingRemote-78KoXœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ExportDoclingDocument-xFoCI{œfieldNameœ:œdata_inputsœ,œidœ:œExportDoclingDocument-xFoCIœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "selected": false, + "source": "DoclingRemote-78KoX", + "sourceHandle": "{œdataTypeœ:œDoclingRemoteœ,œidœ:œDoclingRemote-78KoXœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "ExportDoclingDocument-xFoCI", + "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œExportDoclingDocument-xFoCIœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "ExportDoclingDocument", + "id": "ExportDoclingDocument-xFoCI", + "name": "data", + "output_types": [ + "Data" + ] + }, + "targetHandle": { + "fieldName": "data_inputs", + "id": "SplitText-3ZI5B", + "inputTypes": [ + "Data", + "DataFrame", + "Message" + ], + "type": "other" + } + }, + "id": "xy-edge__ExportDoclingDocument-xFoCI{œdataTypeœ:œExportDoclingDocumentœ,œidœ:œExportDoclingDocument-xFoCIœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-3ZI5B{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-3ZI5Bœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "selected": false, + "source": "ExportDoclingDocument-xFoCI", + "sourceHandle": "{œdataTypeœ:œExportDoclingDocumentœ,œidœ:œExportDoclingDocument-xFoCIœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}", + "target": "SplitText-3ZI5B", + "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-3ZI5Bœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" + } + ], + "nodes": [ + { + "data": { + "description": "Split text into chunks based on specified criteria.", + "display_name": "Split Text", + "id": "SplitText-3ZI5B", + "node": { + "base_classes": [ + "DataFrame" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Split text into chunks based on specified criteria.", + "display_name": "Split Text", + "documentation": "https://docs.langflow.org/components-processing#split-text", + "edited": true, + "field_order": [ + "data_inputs", + "chunk_overlap", + "chunk_size", + "separator", + "text_key", + "keep_separator" + ], + "frozen": false, + "icon": "scissors-line-dashed", + "legacy": false, + "lf_version": "1.6.0", + "metadata": { + "code_hash": "65a90e1f4fe6", + "dependencies": { + "dependencies": [ + { + "name": "langchain_text_splitters", + "version": "0.3.9" + }, + { + "name": "langflow", + "version": "1.5.0.post2" + } + ], + "total_dependencies": 2 + }, + "module": "custom_components.split_text" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Chunks", + "group_outputs": false, + "hidden": null, + "method": "split_text", + "name": "dataframe", + "options": null, + "required_inputs": null, + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "chunk_overlap": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Chunk Overlap", + "dynamic": false, + "info": "Number of characters to overlap between chunks.", + "list": false, + "list_add_label": "Add More", + "name": "chunk_overlap", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 200 + }, + "chunk_size": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Chunk Size", + "dynamic": false, + "info": "The maximum length of each chunk. Text is first split by separator, then chunks are merged up to this size. Individual splits larger than this won't be further divided.", + "list": false, + "list_add_label": "Add More", + "name": "chunk_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1000 + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n documentation: str = \"https://docs.langflow.org/components-processing#split-text\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Input\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n data_list = [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n return data_list\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n elif isinstance(self.data_inputs, Message):\n self.data_inputs = [self.data_inputs.to_data()]\n return self.split_text_base()\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n self.log(documents)\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" + }, + "data_inputs": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The data with texts to split in chunks.", + "input_types": [ + "Data", + "DataFrame", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "name": "data_inputs", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "keep_separator": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Keep Separator", + "dynamic": false, + "info": "Whether to keep the separator in the output chunks and where to place it.", + "name": "keep_separator", + "options": [ + "False", + "True", + "Start", + "End" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "False" + }, + "separator": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Separator", + "dynamic": false, + "info": "The character to split on. Use \\n for newline. Examples: \\n\\n for paragraphs, \\n for lines, . for sentences", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "\n" + }, + "text_key": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Text Key", + "dynamic": false, + "info": "The key to use for the text column.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "text_key", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "text" + } + }, + "tool_mode": false + }, + "selected_output": "chunks", + "type": "SplitText" + }, + "dragging": false, + "height": 475, + "id": "SplitText-3ZI5B", + "measured": { + "height": 475, + "width": 320 + }, + "position": { + "x": 1729.1788373023007, + "y": 1330.8003441546418 + }, + "positionAbsolute": { + "x": 1683.4543896546102, + "y": 1350.7871623588553 + }, + "selected": false, + "type": "genericNode", + "width": 320 + }, + { + "data": { + "id": "OpenAIEmbeddings-mP45L", + "node": { + "base_classes": [ + "Embeddings" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Generate embeddings using OpenAI models.", + "display_name": "OpenAI Embeddings", + "documentation": "", + "edited": false, + "field_order": [ + "default_headers", + "default_query", + "chunk_size", + "client", + "deployment", + "embedding_ctx_length", + "max_retries", + "model", + "model_kwargs", + "openai_api_key", + "openai_api_base", + "openai_api_type", + "openai_api_version", + "openai_organization", + "openai_proxy", + "request_timeout", + "show_progress_bar", + "skip_empty", + "tiktoken_model_name", + "tiktoken_enable", + "dimensions" + ], + "frozen": false, + "icon": "OpenAI", + "legacy": false, + "metadata": { + "code_hash": "8a658ed6d4c9", + "dependencies": { + "dependencies": [ + { + "name": "langchain_openai", + "version": "0.3.23" + }, + { + "name": "lfx", + "version": null + } + ], + "total_dependencies": 2 + }, + "module": "custom_components.openai_embeddings" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Embedding Model", + "group_outputs": false, + "method": "build_embeddings", + "name": "embeddings", + "options": null, + "required_inputs": null, + "selected": "Embeddings", + "tool_mode": true, + "types": [ + "Embeddings" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "chunk_size": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Chunk Size", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "chunk_size", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1000 + }, + "client": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Client", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "client", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langchain_openai import OpenAIEmbeddings\n\nfrom lfx.base.embeddings.model import LCEmbeddingsModel\nfrom lfx.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom lfx.field_typing import Embeddings\nfrom lfx.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput\n\n\nclass OpenAIEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n icon = \"OpenAI\"\n name = \"OpenAIEmbeddings\"\n\n inputs = [\n DictInput(\n name=\"default_headers\",\n display_name=\"Default Headers\",\n advanced=True,\n info=\"Default headers to use for the API request.\",\n ),\n DictInput(\n name=\"default_query\",\n display_name=\"Default Query\",\n advanced=True,\n info=\"Default query parameters to use for the API request.\",\n ),\n IntInput(name=\"chunk_size\", display_name=\"Chunk Size\", advanced=True, value=1000),\n MessageTextInput(name=\"client\", display_name=\"Client\", advanced=True),\n MessageTextInput(name=\"deployment\", display_name=\"Deployment\", advanced=True),\n IntInput(name=\"embedding_ctx_length\", display_name=\"Embedding Context Length\", advanced=True, value=1536),\n IntInput(name=\"max_retries\", display_name=\"Max Retries\", value=3, advanced=True),\n DropdownInput(\n name=\"model\",\n display_name=\"Model\",\n advanced=False,\n options=OPENAI_EMBEDDING_MODEL_NAMES,\n value=\"text-embedding-3-small\",\n ),\n DictInput(name=\"model_kwargs\", display_name=\"Model Kwargs\", advanced=True),\n SecretStrInput(name=\"openai_api_key\", display_name=\"OpenAI API Key\", value=\"OPENAI_API_KEY\", required=True),\n MessageTextInput(name=\"openai_api_base\", display_name=\"OpenAI API Base\", advanced=True),\n MessageTextInput(name=\"openai_api_type\", display_name=\"OpenAI API Type\", advanced=True),\n MessageTextInput(name=\"openai_api_version\", display_name=\"OpenAI API Version\", advanced=True),\n MessageTextInput(\n name=\"openai_organization\",\n display_name=\"OpenAI Organization\",\n advanced=True,\n ),\n MessageTextInput(name=\"openai_proxy\", display_name=\"OpenAI Proxy\", advanced=True),\n FloatInput(name=\"request_timeout\", display_name=\"Request Timeout\", advanced=True),\n BoolInput(name=\"show_progress_bar\", display_name=\"Show Progress Bar\", advanced=True),\n BoolInput(name=\"skip_empty\", display_name=\"Skip Empty\", advanced=True),\n MessageTextInput(\n name=\"tiktoken_model_name\",\n display_name=\"TikToken Model Name\",\n advanced=True,\n ),\n BoolInput(\n name=\"tiktoken_enable\",\n display_name=\"TikToken Enable\",\n advanced=True,\n value=True,\n info=\"If False, you must have transformers installed.\",\n ),\n IntInput(\n name=\"dimensions\",\n display_name=\"Dimensions\",\n info=\"The number of dimensions the resulting output embeddings should have. \"\n \"Only supported by certain models.\",\n advanced=True,\n ),\n ]\n\n def build_embeddings(self) -> Embeddings:\n return OpenAIEmbeddings(\n client=self.client or None,\n model=self.model,\n dimensions=self.dimensions or None,\n deployment=self.deployment or None,\n api_version=self.openai_api_version or None,\n base_url=self.openai_api_base or None,\n openai_api_type=self.openai_api_type or None,\n openai_proxy=self.openai_proxy or None,\n embedding_ctx_length=self.embedding_ctx_length,\n api_key=self.openai_api_key or None,\n organization=self.openai_organization or None,\n allowed_special=\"all\",\n disallowed_special=\"all\",\n chunk_size=self.chunk_size,\n max_retries=self.max_retries,\n timeout=self.request_timeout or None,\n tiktoken_enabled=self.tiktoken_enable,\n tiktoken_model_name=self.tiktoken_model_name or None,\n show_progress_bar=self.show_progress_bar,\n model_kwargs=self.model_kwargs,\n skip_empty=self.skip_empty,\n default_headers=self.default_headers or None,\n default_query=self.default_query or None,\n )\n" + }, + "default_headers": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "Default Headers", + "dynamic": false, + "info": "Default headers to use for the API request.", + "list": false, + "list_add_label": "Add More", + "name": "default_headers", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "default_query": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "Default Query", + "dynamic": false, + "info": "Default query parameters to use for the API request.", + "list": false, + "list_add_label": "Add More", + "name": "default_query", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "deployment": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Deployment", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "deployment", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "dimensions": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Dimensions", + "dynamic": false, + "info": "The number of dimensions the resulting output embeddings should have. Only supported by certain models.", + "list": false, + "list_add_label": "Add More", + "name": "dimensions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": "" + }, + "embedding_ctx_length": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Embedding Context Length", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "embedding_ctx_length", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1536 + }, + "max_retries": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Retries", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "max_retries", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 3 + }, + "model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", + "dynamic": false, + "info": "", + "name": "model", + "options": [ + "text-embedding-3-small", + "text-embedding-3-large", + "text-embedding-ada-002" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "text-embedding-3-small" + }, + "model_kwargs": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "Model Kwargs", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "model_kwargs", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "openai_api_base": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "OpenAI API Base", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_api_base", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "openai_api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "OpenAI API Key", + "dynamic": false, + "info": "", + "input_types": [], + "load_from_db": false, + "name": "openai_api_key", + "password": true, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "openai_api_type": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "OpenAI API Type", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_api_type", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "openai_api_version": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "OpenAI API Version", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_api_version", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "openai_organization": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "OpenAI Organization", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_organization", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "openai_proxy": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "OpenAI Proxy", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_proxy", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "request_timeout": { + "_input_type": "FloatInput", + "advanced": true, + "display_name": "Request Timeout", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "request_timeout", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "float", + "value": "" + }, + "show_progress_bar": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Show Progress Bar", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "show_progress_bar", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "skip_empty": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Skip Empty", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "skip_empty", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "tiktoken_enable": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "TikToken Enable", + "dynamic": false, + "info": "If False, you must have transformers installed.", + "list": false, + "list_add_label": "Add More", + "name": "tiktoken_enable", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "tiktoken_model_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "TikToken Model Name", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "tiktoken_model_name", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + }, + "tool_mode": false + }, + "selected_output": "embeddings", + "type": "OpenAIEmbeddings" + }, + "dragging": false, + "height": 320, + "id": "OpenAIEmbeddings-mP45L", + "measured": { + "height": 320, + "width": 320 + }, + "position": { + "x": 1704.8491676318172, + "y": 1879.144249471858 + }, + "positionAbsolute": { + "x": 1690.9220896443658, + "y": 1866.483269483266 + }, + "selected": false, + "type": "genericNode", + "width": 320 + }, + { + "data": { + "id": "note-59mzY", + "node": { + "description": "### 💡 Add your OpenAI API key here 👇", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "transparent" + } + }, + "type": "note" + }, + "dragging": false, + "height": 324, + "id": "note-59mzY", + "measured": { + "height": 324, + "width": 324 + }, + "position": { + "x": 1692.2322233423606, + "y": 1821.9077961087607 + }, + "positionAbsolute": { + "x": 1692.2322233423606, + "y": 1821.9077961087607 + }, + "selected": false, + "type": "noteNode", + "width": 324 + }, + { + "data": { + "id": "OpenSearchHybrid-XtKoA", + "node": { + "base_classes": [ + "Data", + "DataFrame", + "VectorStore" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", + "display_name": "OpenSearch (Hybrid)", + "documentation": "", + "edited": true, + "field_order": [ + "docs_metadata", + "opensearch_url", + "index_name", + "engine", + "space_type", + "ef_construction", + "m", + "ingest_data", + "search_query", + "should_cache_vector_store", + "embedding", + "vector_field", + "number_of_results", + "filter_expression", + "auth_mode", + "username", + "password", + "jwt_token", + "jwt_header", + "bearer_prefix", + "use_ssl", + "verify_certs" + ], + "frozen": false, + "icon": "OpenSearch", + "legacy": false, + "metadata": { + "code_hash": "deee3f04cb47", + "dependencies": { + "dependencies": [ + { + "name": "langflow", + "version": "1.5.0.post2" + }, + { + "name": "opensearchpy", + "version": "2.8.0" + } + ], + "total_dependencies": 2 + }, + "module": "custom_components.opensearch_hybrid" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Search Results", + "group_outputs": false, + "hidden": null, + "method": "search_documents", + "name": "search_results", + "options": null, + "required_inputs": null, + "selected": "Data", + "tool_mode": true, + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "group_outputs": false, + "hidden": null, + "method": "as_dataframe", + "name": "dataframe", + "options": null, + "required_inputs": null, + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Vector Store Connection", + "group_outputs": false, + "hidden": true, + "method": "as_vector_store", + "name": "vectorstoreconnection", + "options": null, + "required_inputs": null, + "selected": "VectorStore", + "tool_mode": true, + "types": [ + "VectorStore" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "auth_mode": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Auth Mode", + "dynamic": false, + "info": "Choose Basic (username/password) or JWT (Bearer token).", + "load_from_db": false, + "name": "auth_mode", + "options": [ + "basic", + "jwt" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "jwt" + }, + "bearer_prefix": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Prefix 'Bearer '", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "bearer_prefix", + "placeholder": "", + "required": false, + "show": false, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any, Dict, List, Optional\n\nfrom langflow.base.vectorstores.model import (\n LCVectorStoreComponent,\n check_cached_vector_store,\n)\nfrom langflow.base.vectorstores.vector_store_connection_decorator import (\n vector_store_connection,\n)\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom opensearchpy import OpenSearch, helpers\n\n\n@vector_store_connection\nclass OpenSearchHybridComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch hybrid search: KNN (k=10, boost=0.7) + multi_match (boost=0.3) with optional filters & min_score.\"\"\"\n\n display_name: str = \"OpenSearch (Hybrid)\"\n name: str = \"OpenSearchHybrid\"\n icon: str = \"OpenSearch\"\n description: str = \"Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.\"\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[\n i.name for i in LCVectorStoreComponent.inputs\n ], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n \"engine\",\n \"space_type\",\n \"ef_construction\",\n \"m\",\n \"docs_metadata\",\n ]\n\n inputs = [\n TableInput(\n name=\"docs_metadata\",\n display_name=\"Ingestion Metadata\",\n info=\"Key value pairs to be inserted into each ingested document.\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Key name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Value of the metadata\",\n },\n ],\n value=[],\n advanced=True,\n ),\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=\"URL for your OpenSearch cluster.\",\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=\"The index to search.\",\n ),\n DropdownInput(\n name=\"engine\",\n display_name=\"Engine\",\n options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n value=\"jvector\",\n info=\"Vector search engine to use.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"space_type\",\n display_name=\"Space Type\",\n options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n value=\"l2\",\n info=\"Distance metric for vector similarity.\",\n advanced=True,\n ),\n IntInput(\n name=\"ef_construction\",\n display_name=\"EF Construction\",\n value=512,\n info=\"Size of the dynamic list used during k-NN graph creation.\",\n advanced=True,\n ),\n IntInput(\n name=\"m\",\n display_name=\"M Parameter\",\n value=16,\n info=\"Number of bidirectional links created for each new element.\",\n advanced=True,\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(\n name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]\n ),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Vector field used for KNN.\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Size (limit)\",\n value=10,\n advanced=True,\n info=\"Default number of hits when no limit provided in filter_expression.\",\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Filter Expression (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON to control filters/limit/score threshold.\\n\"\n \"Accepted shapes:\\n\"\n '1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\\n'\n '2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\\n'\n \"Placeholders with __IMPOSSIBLE_VALUE__ are ignored.\"\n ),\n ),\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Auth Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"Password\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"JWT\",\n load_from_db=True,\n show=True,\n info=\"Paste a valid JWT (sent as a header).\",\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n # ----- TLS -----\n BoolInput(name=\"use_ssl\", display_name=\"Use SSL\", value=True, advanced=True),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify Certificates\",\n value=False,\n advanced=True,\n ),\n ]\n\n # ---------- helper functions for index management ----------\n def _default_text_mapping(\n self,\n dim: int,\n engine: str = \"jvector\",\n space_type: str = \"l2\",\n ef_search: int = 512,\n ef_construction: int = 100,\n m: int = 16,\n vector_field: str = \"vector_field\",\n ) -> Dict[str, Any]:\n \"\"\"For Approximate k-NN Search, this is the default mapping to create index.\"\"\"\n return {\n \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n \"mappings\": {\n \"properties\": {\n vector_field: {\n \"type\": \"knn_vector\",\n \"dimension\": dim,\n \"method\": {\n \"name\": \"disk_ann\",\n \"space_type\": space_type,\n \"engine\": engine,\n \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n },\n }\n }\n },\n }\n\n def _validate_aoss_with_engines(self, is_aoss: bool, engine: str) -> None:\n \"\"\"Validate AOSS with the engine.\"\"\"\n if is_aoss and engine != \"nmslib\" and engine != \"faiss\":\n raise ValueError(\n \"Amazon OpenSearch Service Serverless only \"\n \"supports `nmslib` or `faiss` engines\"\n )\n\n def _is_aoss_enabled(self, http_auth: Any) -> bool:\n \"\"\"Check if the service is http_auth is set as `aoss`.\"\"\"\n if (\n http_auth is not None\n and hasattr(http_auth, \"service\")\n and http_auth.service == \"aoss\"\n ):\n return True\n return False\n\n def _bulk_ingest_embeddings(\n self,\n client: OpenSearch,\n index_name: str,\n embeddings: List[List[float]],\n texts: List[str],\n metadatas: Optional[List[dict]] = None,\n ids: Optional[List[str]] = None,\n vector_field: str = \"vector_field\",\n text_field: str = \"text\",\n mapping: Optional[Dict] = None,\n max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,\n is_aoss: bool = False,\n ) -> List[str]:\n \"\"\"Bulk Ingest Embeddings into given index.\"\"\"\n if not mapping:\n mapping = dict()\n\n requests = []\n return_ids = []\n\n for i, text in enumerate(texts):\n metadata = metadatas[i] if metadatas else {}\n _id = ids[i] if ids else str(uuid.uuid4())\n request = {\n \"_op_type\": \"index\",\n \"_index\": index_name,\n vector_field: embeddings[i],\n text_field: text,\n **metadata,\n }\n if is_aoss:\n request[\"id\"] = _id\n else:\n request[\"_id\"] = _id\n requests.append(request)\n return_ids.append(_id)\n self.log(metadatas[i])\n helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n return return_ids\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> Dict[str, Any]:\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n self.log(self.ingest_data)\n client = self.build_client()\n self._add_documents_to_vector_store(client=client)\n return client\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n # Convert DataFrame to Data if needed using parent's method\n self.ingest_data = self._prepare_ingest_data()\n\n docs = self.ingest_data or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Extract texts and metadata from documents\n texts = []\n metadatas = []\n # Process docs_metadata table input into a dict\n additional_metadata = {}\n if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n for item in self.docs_metadata:\n if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n additional_metadata[item[\"key\"]] = item[\"value\"]\n\n for doc_obj in docs:\n data_copy = json.loads(doc_obj.model_dump_json())\n text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n texts.append(text)\n\n # Merge additional metadata from table input\n data_copy.update(additional_metadata)\n\n metadatas.append(data_copy)\n self.log(metadatas)\n if not self.embedding:\n raise ValueError(\"Embedding handle is required to embed documents.\")\n\n # Generate embeddings\n vectors = self.embedding.embed_documents(texts)\n\n if not vectors:\n self.log(\"No vectors generated from documents.\")\n return\n\n # Get vector dimension for mapping\n dim = len(vectors[0]) if vectors else 768 # default fallback\n\n # Check for AOSS\n auth_kwargs = self._build_auth_kwargs()\n is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n # Validate engine with AOSS\n engine = getattr(self, \"engine\", \"jvector\")\n self._validate_aoss_with_engines(is_aoss, engine)\n\n # Create mapping with proper KNN settings\n space_type = getattr(self, \"space_type\", \"l2\")\n ef_construction = getattr(self, \"ef_construction\", 512)\n m = getattr(self, \"m\", 16)\n\n mapping = self._default_text_mapping(\n dim=dim,\n engine=engine,\n space_type=space_type,\n ef_construction=ef_construction,\n m=m,\n vector_field=self.vector_field,\n )\n\n self.log(\n f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\"\n )\n\n # Use the LangChain-style bulk ingestion\n return_ids = self._bulk_ingest_embeddings(\n client=client,\n index_name=self.index_name,\n embeddings=vectors,\n texts=texts,\n metadatas=metadatas,\n vector_field=self.vector_field,\n text_field=\"text\",\n mapping=mapping,\n is_aoss=is_aoss,\n )\n self.log(metadatas)\n\n self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> List[dict]:\n \"\"\"\n Accepts either:\n A) {\"filter\":[ ...term/terms objects... ], \"limit\":..., \"score_threshold\":...}\n B) Context-style: {\"data_sources\":[...], \"document_types\":[...], \"owners\":[...]}\n Returns a list of OS filter clauses (term/terms), skipping placeholders and empty terms.\n \"\"\"\n\n if not filter_obj:\n return []\n\n # If it’s a string, try to parse it once\n if isinstance(filter_obj, str):\n try:\n filter_obj = json.loads(filter_obj)\n except Exception:\n # Not valid JSON → treat as no filters\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n clauses: List[dict] = []\n for f in raw or []:\n if (\n \"term\" in f\n and isinstance(f[\"term\"], dict)\n and not self._is_placeholder_term(f[\"term\"])\n ):\n clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n clauses.append(f)\n return clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\n \"data_sources\": \"filename\",\n \"document_types\": \"mimetype\",\n \"owners\": \"owner\",\n }\n clauses: List[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n clauses.append({\"term\": {field: values[0]}})\n else:\n clauses.append({\"terms\": {field: values}})\n return clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n logger.info(self.ingest_data)\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n raise ValueError(f\"Invalid filter_expression JSON: {e}\") from e\n\n if not self.embedding:\n raise ValueError(\n \"Embedding is required to run hybrid search (KNN + keyword).\"\n )\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7,\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3,\n }\n },\n ],\n \"minimum_should_match\": 1,\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n },\n \"_source\": [\n \"filename\",\n \"mimetype\",\n \"page\",\n \"text\",\n \"source_url\",\n \"owner\",\n \"allowed_users\",\n \"allowed_groups\",\n ],\n \"size\": limit,\n }\n if clauses:\n body[\"query\"][\"bool\"][\"filter\"] = clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n try:\n raw = self.search(self.search_query or \"\")\n return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n self.log(self.ingest_data)\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(\n self, build_config: dict, field_value: str, field_name: str | None = None\n ) -> dict:\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n return build_config\n\n except Exception as e:\n self.log(f\"update_build_config error: {e}\")\n return build_config\n" + }, + "docs_metadata": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Ingestion Metadata", + "dynamic": false, + "info": "Key value pairs to be inserted into each ingested document.", + "is_list": true, + "list_add_label": "Add More", + "name": "docs_metadata", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "None", + "description": "Key name", + "disable_edit": false, + "display_name": "Key", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "key", + "sortable": true, + "type": "str" + }, + { + "default": "None", + "description": "Value of the metadata", + "disable_edit": false, + "display_name": "Value", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "value", + "sortable": true, + "type": "str" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [] + }, + "ef_construction": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "EF Construction", + "dynamic": false, + "info": "Size of the dynamic list used during k-NN graph creation.", + "list": false, + "list_add_label": "Add More", + "name": "ef_construction", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 512 + }, + "embedding": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Embedding", + "dynamic": false, + "info": "", + "input_types": [ + "Embeddings" + ], + "list": false, + "list_add_label": "Add More", + "name": "embedding", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "engine": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Engine", + "dynamic": false, + "info": "Vector search engine to use.", + "load_from_db": false, + "name": "engine", + "options": [ + "jvector", + "nmslib", + "faiss", + "lucene" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "nmslib" + }, + "filter_expression": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Filter Expression (JSON)", + "dynamic": false, + "info": "Optional JSON to control filters/limit/score threshold.\nAccepted shapes:\n1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\n2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\nPlaceholders with __IMPOSSIBLE_VALUE__ are ignored.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "filter_expression", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "index_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Index Name", + "dynamic": false, + "info": "The index to search.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "index_name", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "documents" + }, + "ingest_data": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Ingest Data", + "dynamic": false, + "info": "", + "input_types": [ + "Data", + "DataFrame" + ], + "list": true, + "list_add_label": "Add More", + "name": "ingest_data", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "jwt_header": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "JWT Header Name", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "jwt_header", + "placeholder": "", + "required": false, + "show": false, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Authorization" + }, + "jwt_token": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "JWT Token", + "dynamic": false, + "info": "Paste a valid JWT (sent as a header).", + "input_types": [], + "load_from_db": false, + "name": "jwt_token", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "m": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "M Parameter", + "dynamic": false, + "info": "Number of bidirectional links created for each new element.", + "list": false, + "list_add_label": "Add More", + "name": "m", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 16 + }, + "number_of_results": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Default Size (limit)", + "dynamic": false, + "info": "Default number of hits when no limit provided in filter_expression.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "number_of_results", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 15 + }, + "opensearch_url": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "OpenSearch URL", + "dynamic": false, + "info": "URL for your OpenSearch cluster.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "opensearch_url", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "https://opensearch:9200" + }, + "password": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Password", + "dynamic": false, + "info": "", + "input_types": [], + "load_from_db": false, + "name": "password", + "password": true, + "placeholder": "", + "required": false, + "show": false, + "title_case": false, + "type": "str", + "value": "" + }, + "search_query": { + "_input_type": "QueryInput", + "advanced": false, + "display_name": "Search Query", + "dynamic": false, + "info": "Enter a query to run a similarity search.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "search_query", + "placeholder": "Enter a query...", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "query", + "value": "" + }, + "should_cache_vector_store": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Cache Vector Store", + "dynamic": false, + "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.", + "list": false, + "list_add_label": "Add More", + "name": "should_cache_vector_store", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "space_type": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Space Type", + "dynamic": false, + "info": "Distance metric for vector similarity.", + "name": "space_type", + "options": [ + "l2", + "l1", + "cosinesimil", + "linf", + "innerproduct" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "l2" + }, + "use_ssl": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Use SSL", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "use_ssl", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "username": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Username", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "username", + "placeholder": "", + "required": false, + "show": false, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "admin" + }, + "vector_field": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Vector Field", + "dynamic": false, + "info": "Vector field used for KNN.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "vector_field", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "chunk_embedding" + }, + "verify_certs": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Verify Certificates", + "dynamic": false, + "info": "", + "list": false, + "list_add_label": "Add More", + "name": "verify_certs", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + } + }, + "tool_mode": false + }, + "selected_output": "search_results", + "showNode": true, + "type": "OpenSearchHybrid" + }, + "dragging": false, + "id": "OpenSearchHybrid-XtKoA", + "measured": { + "height": 765, + "width": 320 + }, + "position": { + "x": 2218.9287723423276, + "y": 1332.2598463956504 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "DoclingRemote-78KoX", + "node": { + "base_classes": [ + "DataFrame" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Uses Docling to process input documents connecting to your instance of Docling Serve.", + "display_name": "Docling Serve", + "documentation": "https://docling-project.github.io/docling/", + "edited": false, + "field_order": [ + "path", + "file_path", + "separator", + "silent_errors", + "delete_server_file_after_processing", + "ignore_unsupported_extensions", + "ignore_unspecified_files", + "api_url", + "max_concurrency", + "max_poll_timeout", + "api_headers", + "docling_serve_opts" + ], + "frozen": false, + "icon": "Docling", + "legacy": false, + "lf_version": "1.6.0", + "metadata": { + "code_hash": "930312ffe40c", + "dependencies": { + "dependencies": [ + { + "name": "httpx", + "version": "0.28.1" + }, + { + "name": "docling_core", + "version": "2.45.0" + }, + { + "name": "pydantic", + "version": "2.10.6" + }, + { + "name": "lfx", + "version": null + } + ], + "total_dependencies": 4 + }, + "module": "lfx.components.docling.docling_remote.DoclingRemoteComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Files", + "group_outputs": false, + "method": "load_files", + "name": "dataframe", + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "api_headers": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "HTTP headers", + "dynamic": false, + "info": "Optional dictionary of additional headers required for connecting to Docling Serve.", + "list": false, + "list_add_label": "Add More", + "name": "api_headers", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "api_url": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Server address", + "dynamic": false, + "info": "URL of the Docling Serve instance.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "api_url", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "http://localhost:5001" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import base64\nimport time\nfrom concurrent.futures import Future, ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Any\n\nimport httpx\nfrom docling_core.types.doc import DoclingDocument\nfrom pydantic import ValidationError\n\nfrom lfx.base.data import BaseFileComponent\nfrom lfx.inputs import IntInput, NestedDictInput, StrInput\nfrom lfx.inputs.inputs import FloatInput\nfrom lfx.schema import Data\n\n\nclass DoclingRemoteComponent(BaseFileComponent):\n display_name = \"Docling Serve\"\n description = \"Uses Docling to process input documents connecting to your instance of Docling Serve.\"\n documentation = \"https://docling-project.github.io/docling/\"\n trace_type = \"tool\"\n icon = \"Docling\"\n name = \"DoclingRemote\"\n\n MAX_500_RETRIES = 5\n\n # https://docling-project.github.io/docling/usage/supported_formats/\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n ]\n\n inputs = [\n *BaseFileComponent.get_base_inputs(),\n StrInput(\n name=\"api_url\",\n display_name=\"Server address\",\n info=\"URL of the Docling Serve instance.\",\n required=True,\n ),\n IntInput(\n name=\"max_concurrency\",\n display_name=\"Concurrency\",\n info=\"Maximum number of concurrent requests for the server.\",\n advanced=True,\n value=2,\n ),\n FloatInput(\n name=\"max_poll_timeout\",\n display_name=\"Maximum poll time\",\n info=\"Maximum waiting time for the document conversion to complete.\",\n advanced=True,\n value=3600,\n ),\n NestedDictInput(\n name=\"api_headers\",\n display_name=\"HTTP headers\",\n advanced=True,\n required=False,\n info=(\"Optional dictionary of additional headers required for connecting to Docling Serve.\"),\n ),\n NestedDictInput(\n name=\"docling_serve_opts\",\n display_name=\"Docling options\",\n advanced=True,\n required=False,\n info=(\n \"Optional dictionary of additional options. \"\n \"See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.\"\n ),\n ),\n ]\n\n outputs = [\n *BaseFileComponent.get_base_outputs(),\n ]\n\n def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:\n base_url = f\"{self.api_url}/v1\"\n\n def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:\n encoded_doc = base64.b64encode(file_path.read_bytes()).decode()\n payload = {\n \"options\": options,\n \"sources\": [{\"kind\": \"file\", \"base64_string\": encoded_doc, \"filename\": file_path.name}],\n }\n\n response = client.post(f\"{base_url}/convert/source/async\", json=payload)\n response.raise_for_status()\n task = response.json()\n\n http_failures = 0\n retry_status_start = 500\n retry_status_end = 600\n start_wait_time = time.monotonic()\n while task[\"task_status\"] not in (\"success\", \"failure\"):\n # Check if processing exceeds the maximum poll timeout\n processing_time = time.monotonic() - start_wait_time\n if processing_time >= self.max_poll_timeout:\n msg = (\n f\"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}.\"\n \"Please increase the max_poll_timeout parameter or review why the processing \"\n \"takes long on the server.\"\n )\n self.log(msg)\n raise RuntimeError(msg)\n\n # Call for a new status update\n time.sleep(2)\n response = client.get(f\"{base_url}/status/poll/{task['task_id']}\")\n\n # Check if the status call gets into 5xx errors and retry\n if retry_status_start <= response.status_code < retry_status_end:\n http_failures += 1\n if http_failures > self.MAX_500_RETRIES:\n self.log(f\"The status requests got a http response {response.status_code} too many times.\")\n return None\n continue\n\n # Update task status\n task = response.json()\n\n result_resp = client.get(f\"{base_url}/result/{task['task_id']}\")\n result_resp.raise_for_status()\n result = result_resp.json()\n\n if \"json_content\" not in result[\"document\"] or result[\"document\"][\"json_content\"] is None:\n self.log(\"No JSON DoclingDocument found in the result.\")\n return None\n\n try:\n doc = DoclingDocument.model_validate(result[\"document\"][\"json_content\"])\n return Data(data={\"doc\": doc, \"file_path\": str(file_path)})\n except ValidationError as e:\n self.log(f\"Error validating the document. {e}\")\n return None\n\n docling_options = {\n \"to_formats\": [\"json\"],\n \"image_export_mode\": \"placeholder\",\n **(self.docling_serve_opts or {}),\n }\n\n processed_data: list[Data | None] = []\n with (\n httpx.Client(headers=self.api_headers) as client,\n ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,\n ):\n futures: list[tuple[int, Future]] = []\n for i, file in enumerate(file_list):\n if file.path is None:\n processed_data.append(None)\n continue\n\n futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))\n\n for _index, future in futures:\n try:\n result_data = future.result()\n processed_data.append(result_data)\n except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:\n self.log(f\"Docling remote processing failed: {exc}\")\n raise\n\n return self.rollup_data(file_list, processed_data)\n" + }, + "delete_server_file_after_processing": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Delete Server File After Processing", + "dynamic": false, + "info": "If true, the Server File Path will be deleted after processing.", + "list": false, + "list_add_label": "Add More", + "name": "delete_server_file_after_processing", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "docling_serve_opts": { + "_input_type": "NestedDictInput", + "advanced": false, + "display_name": "Docling options", + "dynamic": false, + "info": "Optional dictionary of additional options. See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.", + "list": false, + "list_add_label": "Add More", + "name": "docling_serve_opts", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": { + "do_ocr": false + } + }, + "file_path": { + "_input_type": "HandleInput", + "advanced": true, + "display_name": "Server File Path", + "dynamic": false, + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", + "input_types": [ + "Data", + "Message" + ], + "list": true, + "list_add_label": "Add More", + "name": "file_path", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "ignore_unspecified_files": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Ignore Unspecified Files", + "dynamic": false, + "info": "If true, Data with no 'file_path' property will be ignored.", + "list": false, + "list_add_label": "Add More", + "name": "ignore_unspecified_files", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "ignore_unsupported_extensions": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Ignore Unsupported Extensions", + "dynamic": false, + "info": "If true, files with unsupported extensions will not be processed.", + "list": false, + "list_add_label": "Add More", + "name": "ignore_unsupported_extensions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "max_concurrency": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Concurrency", + "dynamic": false, + "info": "Maximum number of concurrent requests for the server.", + "list": false, + "list_add_label": "Add More", + "name": "max_concurrency", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 2 + }, + "max_poll_timeout": { + "_input_type": "FloatInput", + "advanced": true, + "display_name": "Maximum poll time", + "dynamic": false, + "info": "Maximum waiting time for the document conversion to complete.", + "list": false, + "list_add_label": "Add More", + "name": "max_poll_timeout", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "float", + "value": 3600 + }, + "path": { + "_input_type": "FileInput", + "advanced": false, + "display_name": "Files", + "dynamic": false, + "fileTypes": [ + "adoc", + "asciidoc", + "asc", + "bmp", + "csv", + "dotx", + "dotm", + "docm", + "docx", + "htm", + "html", + "jpeg", + "json", + "md", + "pdf", + "png", + "potx", + "ppsx", + "pptm", + "potm", + "ppsm", + "pptx", + "tiff", + "txt", + "xls", + "xlsx", + "xhtml", + "xml", + "webp", + "zip", + "tar", + "tgz", + "bz2", + "gz" + ], + "file_path": [], + "info": "Supported file extensions: adoc, asciidoc, asc, bmp, csv, dotx, dotm, docm, docx, htm, html, jpeg, json, md, pdf, png, potx, ppsx, pptm, potm, ppsm, pptx, tiff, txt, xls, xlsx, xhtml, xml, webp; optionally bundled in file extensions: zip, tar, tgz, bz2, gz", + "list": true, + "list_add_label": "Add More", + "name": "path", + "placeholder": "", + "required": false, + "show": true, + "temp_file": false, + "title_case": false, + "trace_as_metadata": true, + "type": "file", + "value": "" + }, + "separator": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "Specify the separator to use between multiple outputs in Message format.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "separator", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "\n\n" + }, + "silent_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Silent Errors", + "dynamic": false, + "info": "If true, errors will not raise an exception.", + "list": false, + "list_add_label": "Add More", + "name": "silent_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "DoclingRemote" + }, + "dragging": false, + "id": "DoclingRemote-78KoX", + "measured": { + "height": 475, + "width": 320 + }, + "position": { + "x": 974.2998232996713, + "y": 1337.9345348080217 + }, + "selected": true, + "type": "genericNode" + }, + { + "data": { + "id": "ExportDoclingDocument-xFoCI", + "node": { + "base_classes": [ + "Data", + "DataFrame" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Export DoclingDocument to markdown, html or other formats.", + "display_name": "Export DoclingDocument", + "documentation": "https://docling-project.github.io/docling/", + "edited": false, + "field_order": [ + "data_inputs", + "export_format", + "image_mode", + "md_image_placeholder", + "md_page_break_placeholder", + "doc_key" + ], + "frozen": false, + "icon": "Docling", + "legacy": false, + "lf_version": "1.6.0", + "metadata": { + "code_hash": "4de16ddd37ac", + "dependencies": { + "dependencies": [ + { + "name": "docling_core", + "version": "2.45.0" + }, + { + "name": "lfx", + "version": null + } + ], + "total_dependencies": 2 + }, + "module": "lfx.components.docling.export_docling_document.ExportDoclingDocumentComponent" + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Exported data", + "group_outputs": false, + "method": "export_document", + "name": "data", + "selected": "Data", + "tool_mode": true, + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "group_outputs": false, + "method": "as_dataframe", + "name": "dataframe", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from typing import Any\n\nfrom docling_core.types.doc import ImageRefMode\n\nfrom lfx.base.data.docling_utils import extract_docling_documents\nfrom lfx.custom import Component\nfrom lfx.io import DropdownInput, HandleInput, MessageTextInput, Output, StrInput\nfrom lfx.schema import Data, DataFrame\n\n\nclass ExportDoclingDocumentComponent(Component):\n display_name: str = \"Export DoclingDocument\"\n description: str = \"Export DoclingDocument to markdown, html or other formats.\"\n documentation = \"https://docling-project.github.io/docling/\"\n icon = \"Docling\"\n name = \"ExportDoclingDocument\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with documents to export.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n DropdownInput(\n name=\"export_format\",\n display_name=\"Export format\",\n options=[\"Markdown\", \"HTML\", \"Plaintext\", \"DocTags\"],\n info=\"Select the export format to convert the input.\",\n value=\"Markdown\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"image_mode\",\n display_name=\"Image export mode\",\n options=[\"placeholder\", \"embedded\"],\n info=(\n \"Specify how images are exported in the output. Placeholder will replace the images with a string, \"\n \"whereas Embedded will include them as base64 encoded images.\"\n ),\n value=\"placeholder\",\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder betweek pages in the markdown output.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Exported data\", name=\"data\", method=\"export_document\"),\n Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n ]\n\n def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:\n if field_name == \"export_format\" and field_value == \"Markdown\":\n build_config[\"md_image_placeholder\"][\"show\"] = True\n build_config[\"md_page_break_placeholder\"][\"show\"] = True\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value == \"HTML\":\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value in {\"Plaintext\", \"DocTags\"}:\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = False\n\n return build_config\n\n def export_document(self) -> list[Data]:\n documents = extract_docling_documents(self.data_inputs, self.doc_key)\n\n results: list[Data] = []\n try:\n image_mode = ImageRefMode(self.image_mode)\n for doc in documents:\n content = \"\"\n if self.export_format == \"Markdown\":\n content = doc.export_to_markdown(\n image_mode=image_mode,\n image_placeholder=self.md_image_placeholder,\n page_break_placeholder=self.md_page_break_placeholder,\n )\n elif self.export_format == \"HTML\":\n content = doc.export_to_html(image_mode=image_mode)\n elif self.export_format == \"Plaintext\":\n content = doc.export_to_text()\n elif self.export_format == \"DocTags\":\n content = doc.export_to_doctags()\n\n results.append(Data(text=content))\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n return results\n\n def as_dataframe(self) -> DataFrame:\n return DataFrame(self.export_document())\n" + }, + "data_inputs": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Data or DataFrame", + "dynamic": false, + "info": "The data with documents to export.", + "input_types": [ + "Data", + "DataFrame" + ], + "list": false, + "list_add_label": "Add More", + "name": "data_inputs", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "doc_key": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Doc Key", + "dynamic": false, + "info": "The key to use for the DoclingDocument column.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "doc_key", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "doc" + }, + "export_format": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Export format", + "dynamic": false, + "info": "Select the export format to convert the input.", + "name": "export_format", + "options": [ + "Markdown", + "HTML", + "Plaintext", + "DocTags" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Markdown" + }, + "image_mode": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Image export mode", + "dynamic": false, + "info": "Specify how images are exported in the output. Placeholder will replace the images with a string, whereas Embedded will include them as base64 encoded images.", + "name": "image_mode", + "options": [ + "placeholder", + "embedded" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "placeholder" + }, + "md_image_placeholder": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Image placeholder", + "dynamic": false, + "info": "Specify the image placeholder for markdown exports.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "md_image_placeholder", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "md_page_break_placeholder": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Page break placeholder", + "dynamic": false, + "info": "Add this placeholder betweek pages in the markdown output.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "md_page_break_placeholder", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + }, + "tool_mode": false + }, + "selected_output": "data", + "showNode": true, + "type": "ExportDoclingDocument" + }, + "dragging": false, + "id": "ExportDoclingDocument-xFoCI", + "measured": { + "height": 347, + "width": 320 + }, + "position": { + "x": 1354.7013688969873, + "y": 1365.2986945152204 + }, + "selected": false, + "type": "genericNode" + } + ], + "viewport": { + "x": -708.9707113557265, + "y": -965.7967428241175, + "zoom": 0.7967811989815704 + } + }, + "description": "Load your data for chat context with Retrieval Augmented Generation.", + "endpoint_name": null, + "id": "1402618b-e6d1-4ff2-9a11-d6ce71186915", + "is_component": false, + "last_tested_version": "1.6.0", + "name": "OpenSearch Ingestion Flow Docling Serve", + "tags": [ + "openai", + "astradb", + "rag", + "q-a" + ] +} \ No newline at end of file From febebd7f77a70e6a47d97b76d3784f421fa60368 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:12:47 -0400 Subject: [PATCH 08/19] init --- docs/docs/get-started/intro.mdx | 48 ----------------------- docs/docs/get-started/what-is-openrag.mdx | 10 +++++ docs/sidebars.js | 2 +- 3 files changed, 11 insertions(+), 49 deletions(-) delete mode 100644 docs/docs/get-started/intro.mdx create mode 100644 docs/docs/get-started/what-is-openrag.mdx diff --git a/docs/docs/get-started/intro.mdx b/docs/docs/get-started/intro.mdx deleted file mode 100644 index 22763874..00000000 --- a/docs/docs/get-started/intro.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: What is OpenRAG? -slug: / ---- - -# OpenRAG Introduction - -Let's discover **Docusaurus in less than 5 minutes**. - -## Getting Started - -Get started by **creating a new site**. - -Or **try Docusaurus immediately** with **[docusaurus.new](https://docusaurus.new)**. - -### What you'll need - -- [Node.js](https://nodejs.org/en/download/) version 18.0 or above: - - When installing Node.js, you are recommended to check all checkboxes related to dependencies. - -## Generate a new site - -Generate a new Docusaurus site using the **classic template**. - -The classic template will automatically be added to your project after you run the command: - -```bash -npm init docusaurus@latest my-website classic -``` - -You can type this command into Command Prompt, Powershell, Terminal, or any other integrated terminal of your code editor. - -The command also installs all necessary dependencies you need to run Docusaurus. - -## Start your site - -Run the development server: - -```bash -cd my-website -npm run start -``` - -The `cd` command changes the directory you're working with. In order to work with your newly created Docusaurus site, you'll need to navigate the terminal there. - -The `npm run start` command builds your website locally and serves it through a development server, ready for you to view at http://localhost:3000/. - -Open `docs/intro.md` (this page) and edit some lines: the site **reloads automatically** and displays your changes. diff --git a/docs/docs/get-started/what-is-openrag.mdx b/docs/docs/get-started/what-is-openrag.mdx new file mode 100644 index 00000000..f5b5eef7 --- /dev/null +++ b/docs/docs/get-started/what-is-openrag.mdx @@ -0,0 +1,10 @@ +--- +title: What is OpenRAG? +slug: /what-is-openrag +--- + +OpenRAG exists at the confluence of three powerful open-source projects: + +* Langflow for agentic workflows +* OpenSearch for vector databases +* Docling for data ingestion from common sources like OneDrive, Google Drive, and AWS \ No newline at end of file diff --git a/docs/sidebars.js b/docs/sidebars.js index 51a4ddc3..568989e5 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -22,7 +22,7 @@ const sidebars = { items: [ { type: "doc", - id: "get-started/intro", + id: "get-started/what-is-openrag", label: "Introduction" }, { From a8c8383259dbcb19d4dd51369dd3e56508de17ec Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:21:33 -0400 Subject: [PATCH 09/19] what-is-openrag-init --- docs/docs/get-started/what-is-openrag.mdx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/docs/get-started/what-is-openrag.mdx b/docs/docs/get-started/what-is-openrag.mdx index f5b5eef7..7b411617 100644 --- a/docs/docs/get-started/what-is-openrag.mdx +++ b/docs/docs/get-started/what-is-openrag.mdx @@ -3,8 +3,17 @@ title: What is OpenRAG? slug: /what-is-openrag --- -OpenRAG exists at the confluence of three powerful open-source projects: +OpenRAG is an open-source package for building agentic RAG systems. +It supports integration with a wide range of orchestration tools, vector databases, and LLM providers. -* Langflow for agentic workflows -* OpenSearch for vector databases -* Docling for data ingestion from common sources like OneDrive, Google Drive, and AWS \ No newline at end of file +OpenRAG connects and amplifies three popular, proven open-source projects into one powerful platform: + +* [Langflow](https://docs.langflow.org) - Langflow is a powerful tool to build and deploy AI agents and MCP servers. It supports all major LLMs, vector databases and a growing library of AI tools. + +* [OpenSearch](https://docs.opensearch.org/latest/) - OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data. + +* [Docling](https://docling-project.github.io/docling/) - Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem. + +OpenRAG builds on Langflow's familiar interface while adding OpenSearch for vector storage and Docling for simplified document parsing, with opinionated flows that serve as ready-to-use recipes for ingestion, retrieval, and generation from popular sources like OneDrive, Google Drive, and AWS. And don't fear: every part of the stack is swappable. Write your own custom components in Python, try different language models, and customize your flows to build an agentic RAG system that solves problems. + +Ready to get started? Install OpenRAG and then run the Quickstart to create a powerful RAG pipeline. \ No newline at end of file From 499969728b7aaeed7bb28d5acebd96896bd04aaf Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:44:38 -0400 Subject: [PATCH 10/19] fix-base-path --- docs/docusaurus.config.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index c4175c09..4248c3e2 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -71,7 +71,7 @@ const config = { logo: { alt: 'OpenRAG Logo', src: 'img/logo.svg', - href: '/', + href: 'what-is-openrag', }, items: [ { @@ -89,7 +89,7 @@ const config = { items: [ { label: 'Getting Started', - to: '/', + to: 'what-is-openrag', }, ], }, From 70da229e40fb9577f8320abd7841bf66a51c1fc6 Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Tue, 23 Sep 2025 11:53:58 -0600 Subject: [PATCH 11/19] commit and make select work --- frontend/src/app/knowledge/chunks/page.tsx | 60 ++++++++++++++++------ 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index b59a8760..9a889dae 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -39,6 +39,9 @@ function ChunksPageContent() { const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< ChunkResult[] >([]); + const [selectedChunks, setSelectedChunks] = useState>(new Set()); + + // Calculate average chunk length const averageChunkLength = useMemo( () => chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / @@ -84,10 +87,34 @@ function ChunksPageContent() { setChunks(fileData?.chunks || []); }, [data, filename, fileData?.chunks]); + // Set selected state for all checkboxes when selectAll changes + useEffect(() => { + if (selectAll) { + setSelectedChunks(new Set(chunks.map((_, index) => index))); + } else { + setSelectedChunks(new Set()); + } + }, [selectAll, setSelectedChunks, chunks]); + const handleBack = useCallback(() => { - router.back(); + router.push("/knowledge"); }, [router]); + const handleChunkCardCheckboxChange = useCallback( + (index: number) => { + setSelectedChunks((prevSelected) => { + const newSelected = new Set(prevSelected); + if (newSelected.has(index)) { + newSelected.delete(index); + } else { + newSelected.add(index); + } + return newSelected; + }); + }, + [setSelectedChunks] + ); + if (!filename) { return (
@@ -134,11 +161,13 @@ function ChunksPageContent() { setSelectAll(checked === true)} + onCheckedChange={(handleSelectAll) => + setSelectAll(!!handleSelectAll) + } /> @@ -192,7 +221,10 @@ function ChunksPageContent() {
- + handleChunkCardCheckboxChange(index)} + />
Chunk {chunk.page} @@ -221,11 +253,9 @@ function ChunksPageContent() { Active */}
-
-
- {chunk.text} -
-
+
+ {chunk.text} +
))}
@@ -249,18 +279,17 @@ function ChunksPageContent() { {averageChunkLength.toFixed(0)} chars
-
+ {/* TODO: Uncomment after data is available */} + {/*
Process time
- {/* {averageChunkLength.toFixed(0)} chars */}
Model
- {/* {averageChunkLength.toFixed(0)} chars */}
-
+
*/}
@@ -292,10 +321,11 @@ function ChunksPageContent() { N/A
-
+ {/* TODO: Uncomment after data is available */} + {/*
Source
-
+
*/}
Updated
From a425ba62fbcc34bb3b10281a44ba48edc466fe3e Mon Sep 17 00:00:00 2001 From: boneill-ds Date: Tue, 23 Sep 2025 13:24:25 -0600 Subject: [PATCH 12/19] Update frontend/src/app/knowledge/chunks/page.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/app/knowledge/chunks/page.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 9a889dae..c55690c5 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -26,6 +26,7 @@ const getFileTypeLabel = (mimetype: string) => { if (mimetype === "application/pdf") return "PDF"; if (mimetype === "text/plain") return "Text"; if (mimetype === "application/msword") return "Word Document"; + return "Unknown"; }; function ChunksPageContent() { From 3cb33526dbf1fd9f28a89e2d8895911c027b4a2d Mon Sep 17 00:00:00 2001 From: boneill-ds Date: Tue, 23 Sep 2025 13:24:41 -0600 Subject: [PATCH 13/19] Update frontend/src/app/knowledge/chunks/page.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/app/knowledge/chunks/page.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index c55690c5..d538ce36 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -62,8 +62,8 @@ function ChunksPageContent() { if (queryInputText === "") { setChunksFilteredByQuery(chunks); } else { - setChunksFilteredByQuery((prevChunks) => - prevChunks.filter((chunk) => + setChunksFilteredByQuery( + chunks.filter((chunk) => chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) ) ); From c33b5bcd4ed265c3e3d57d9b1436fdd2121a1e77 Mon Sep 17 00:00:00 2001 From: boneill-ds Date: Tue, 23 Sep 2025 13:24:49 -0600 Subject: [PATCH 14/19] Update frontend/src/app/knowledge/chunks/page.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/app/knowledge/chunks/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index d538ce36..52b4bcfd 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -86,7 +86,7 @@ function ChunksPageContent() { } setChunks(fileData?.chunks || []); - }, [data, filename, fileData?.chunks]); + }, [data, filename]); // Set selected state for all checkboxes when selectAll changes useEffect(() => { From 9caebae22e03b2c795d85dde3f0326c99280ab7b Mon Sep 17 00:00:00 2001 From: boneill-ds Date: Tue, 23 Sep 2025 13:24:57 -0600 Subject: [PATCH 15/19] Update frontend/src/app/knowledge/chunks/page.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/app/knowledge/chunks/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 52b4bcfd..73a687b1 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -227,7 +227,7 @@ function ChunksPageContent() { onClick={() => handleChunkCardCheckboxChange(index)} />
- + Chunk {chunk.page} From 5a473541ad0ce934af4c879156745d30c608133f Mon Sep 17 00:00:00 2001 From: boneill-ds Date: Tue, 23 Sep 2025 13:25:40 -0600 Subject: [PATCH 16/19] Update frontend/src/app/knowledge/chunks/page.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/app/knowledge/chunks/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 73a687b1..7de98830 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -224,7 +224,7 @@ function ChunksPageContent() {
handleChunkCardCheckboxChange(index)} + onCheckedChange={(checked) => handleChunkCardCheckboxChange(checked, index)} />
From 1808ccc149c3dccc490e630b10d10d55d935bf6a Mon Sep 17 00:00:00 2001 From: Brent O'Neill Date: Tue, 23 Sep 2025 13:59:35 -0600 Subject: [PATCH 17/19] fix selection --- frontend/src/app/knowledge/chunks/page.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 7de98830..cdc9fcc3 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -224,7 +224,9 @@ function ChunksPageContent() {
handleChunkCardCheckboxChange(checked, index)} + onCheckedChange={() => + handleChunkCardCheckboxChange(index) + } />
From 917b078c20cb810565b114e30cb9af422f8ec8fd Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Tue, 23 Sep 2025 16:01:02 -0500 Subject: [PATCH 18/19] old tweaks cleanup --- frontend/src/app/settings/page.tsx | 2 ++ src/agent.py | 18 +----------------- src/api/settings.py | 16 +--------------- src/services/chat_service.py | 11 ----------- 4 files changed, 4 insertions(+), 43 deletions(-) diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index 50dc7867..eea555c2 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -152,6 +152,7 @@ function KnowledgeSourcesPage() { }, }); + // Debounced update function const debouncedUpdate = useDebounce( (variables: Parameters[0]) => { @@ -219,6 +220,7 @@ function KnowledgeSourcesPage() { // Update processing mode const handleProcessingModeChange = (mode: string) => { setProcessingMode(mode); + // Update the configuration setting (backend will also update the flow automatically) debouncedUpdate({ doclingPresets: mode }); }; diff --git a/src/agent.py b/src/agent.py index ab99f597..1eb362bd 100644 --- a/src/agent.py +++ b/src/agent.py @@ -106,7 +106,6 @@ async def async_response_stream( model: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, log_prefix: str = "response", ): logger.info("User prompt received", prompt=prompt) @@ -121,8 +120,6 @@ async def async_response_stream( } if previous_response_id is not None: request_params["previous_response_id"] = previous_response_id - if tweaks: - request_params["tweaks"] = tweaks if "x-api-key" not in client.default_headers: if hasattr(client, "api_key") and extra_headers is not None: @@ -199,7 +196,6 @@ async def async_response( model: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, log_prefix: str = "response", ): try: @@ -214,8 +210,6 @@ async def async_response( } if previous_response_id is not None: request_params["previous_response_id"] = previous_response_id - if tweaks: - request_params["tweaks"] = tweaks if extra_headers: request_params["extra_headers"] = extra_headers @@ -249,7 +243,6 @@ async def async_stream( model: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, log_prefix: str = "response", ): async for chunk in async_response_stream( @@ -258,7 +251,6 @@ async def async_stream( model, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=tweaks, log_prefix=log_prefix, ): yield chunk @@ -271,7 +263,6 @@ async def async_langflow( prompt: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, ): response_text, response_id, response_obj = await async_response( langflow_client, @@ -279,7 +270,6 @@ async def async_langflow( flow_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=tweaks, log_prefix="langflow", ) return response_text, response_id @@ -292,7 +282,6 @@ async def async_langflow_stream( prompt: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, ): logger.debug("Starting langflow stream", prompt=prompt) try: @@ -302,8 +291,7 @@ async def async_langflow_stream( flow_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=tweaks, - log_prefix="langflow", + log_prefix="langflow", ): logger.debug( "Yielding chunk from langflow stream", @@ -463,7 +451,6 @@ async def async_langflow_chat( user_id: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, store_conversation: bool = True, ): logger.debug( @@ -497,7 +484,6 @@ async def async_langflow_chat( flow_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=tweaks, log_prefix="langflow", ) logger.debug( @@ -576,7 +562,6 @@ async def async_langflow_chat_stream( user_id: str, extra_headers: dict = None, previous_response_id: str = None, - tweaks: dict = None, ): logger.debug( "async_langflow_chat_stream called", @@ -603,7 +588,6 @@ async def async_langflow_chat_stream( flow_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=tweaks, log_prefix="langflow", ): # Extract text content to build full response for history diff --git a/src/api/settings.py b/src/api/settings.py index 9723cdeb..560eb400 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -47,22 +47,7 @@ def get_docling_preset_configs(): } -def get_docling_tweaks(docling_preset: str = None) -> dict: - """Get Langflow tweaks for docling component based on preset""" - if not docling_preset: - # Get current preset from config - openrag_config = get_openrag_config() - docling_preset = openrag_config.knowledge.doclingPresets - preset_configs = get_docling_preset_configs() - - if docling_preset not in preset_configs: - docling_preset = "standard" # fallback - - preset_config = preset_configs[docling_preset] - docling_serve_opts = json.dumps(preset_config) - - return {"DoclingRemote-ayRdw": {"docling_serve_opts": docling_serve_opts}} async def get_settings(request, session_manager): @@ -626,3 +611,4 @@ async def update_docling_preset(request, session_manager): {"error": f"Failed to update docling preset: {str(e)}"}, status_code=500 ) + diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 4b3c9d26..5ffe30f9 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -2,7 +2,6 @@ import json from config.settings import NUDGES_FLOW_ID, clients, LANGFLOW_URL, LANGFLOW_CHAT_FLOW_ID from agent import async_chat, async_langflow, async_chat_stream from auth_context import set_auth_context -from api.settings import get_docling_tweaks from utils.logging_config import get_logger logger = get_logger(__name__) @@ -127,8 +126,6 @@ class ChatService: "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY." ) - # Get docling tweaks based on current configuration - docling_tweaks = get_docling_tweaks() if stream: from agent import async_langflow_chat_stream @@ -140,7 +137,6 @@ class ChatService: user_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=docling_tweaks, ) else: from agent import async_langflow_chat @@ -152,7 +148,6 @@ class ChatService: user_id, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=docling_tweaks, ) response_data = {"response": response_text} if response_id: @@ -202,8 +197,6 @@ class ChatService: from agent import async_langflow_chat - # Get docling tweaks (might not be used by nudges flow, but keeping consistent) - docling_tweaks = get_docling_tweaks() response_text, response_id = await async_langflow_chat( langflow_client, @@ -211,7 +204,6 @@ class ChatService: prompt, user_id, extra_headers=extra_headers, - tweaks=docling_tweaks, store_conversation=False, ) response_data = {"response": response_text} @@ -242,8 +234,6 @@ class ChatService: raise ValueError( "Langflow client not initialized. Ensure LANGFLOW is reachable or set LANGFLOW_KEY." ) - # Get docling tweaks based on current configuration - docling_tweaks = get_docling_tweaks() response_text, response_id = await async_langflow( langflow_client=langflow_client, @@ -251,7 +241,6 @@ class ChatService: prompt=document_prompt, extra_headers=extra_headers, previous_response_id=previous_response_id, - tweaks=docling_tweaks, ) else: # chat # Set auth context for chat tools and provide user_id From be8e13a173206a72a55dc0d805def0dd1b34a531 Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Wed, 24 Sep 2025 07:27:59 -0600 Subject: [PATCH 19/19] feat: add knowledge status (#53) * feat: add status handling and visual indicators for file statuses * refactor: comment out status field and related rendering logic in SearchPage * format * add timeout on mutation delete document * make file fields be optional * fetch task files and display them on knowledge page * add tasks to files inside task context * added failed to status badge * added files on get all tasks on backend * Changed models to get parameters by settings if not existent * changed settings page to get models when is no ajth mode * fixed openai allowing validation even when value is not present * removed unused console log --------- Co-authored-by: Lucas Oliveira Co-authored-by: Mike Fortman --- .../app/api/mutations/useDeleteDocument.ts | 8 +- .../src/app/api/queries/useGetModelsQuery.ts | 2 +- .../src/app/api/queries/useGetSearchQuery.ts | 27 ++-- frontend/src/app/knowledge/page.tsx | 77 +++++++--- frontend/src/app/settings/page.tsx | 110 +++++++++---- .../ui/animated-processing-icon.tsx | 49 ++++++ frontend/src/components/ui/status-badge.tsx | 58 +++++++ frontend/src/contexts/task-context.tsx | 145 ++++++++++++++++-- src/api/models.py | 97 ++++++++++-- src/services/task_service.py | 66 ++++++-- 10 files changed, 529 insertions(+), 110 deletions(-) create mode 100644 frontend/src/components/ui/animated-processing-icon.tsx create mode 100644 frontend/src/components/ui/status-badge.tsx diff --git a/frontend/src/app/api/mutations/useDeleteDocument.ts b/frontend/src/app/api/mutations/useDeleteDocument.ts index 78985498..47b852b1 100644 --- a/frontend/src/app/api/mutations/useDeleteDocument.ts +++ b/frontend/src/app/api/mutations/useDeleteDocument.ts @@ -14,7 +14,7 @@ interface DeleteDocumentResponse { } const deleteDocument = async ( - data: DeleteDocumentRequest + data: DeleteDocumentRequest, ): Promise => { const response = await fetch("/api/documents/delete-by-filename", { method: "POST", @@ -37,9 +37,11 @@ export const useDeleteDocument = () => { return useMutation({ mutationFn: deleteDocument, - onSuccess: () => { + onSettled: () => { // Invalidate and refetch search queries to update the UI - queryClient.invalidateQueries({ queryKey: ["search"] }); + setTimeout(() => { + queryClient.invalidateQueries({ queryKey: ["search"] }); + }, 1000); }, }); }; diff --git a/frontend/src/app/api/queries/useGetModelsQuery.ts b/frontend/src/app/api/queries/useGetModelsQuery.ts index cd24131b..4ce55bd3 100644 --- a/frontend/src/app/api/queries/useGetModelsQuery.ts +++ b/frontend/src/app/api/queries/useGetModelsQuery.ts @@ -54,7 +54,7 @@ export const useGetOpenAIModelsQuery = ( queryKey: ["models", "openai", params], queryFn: getOpenAIModels, retry: 2, - enabled: options?.enabled !== false, // Allow enabling/disabling from options + enabled: !!params?.apiKey, staleTime: 0, // Always fetch fresh data gcTime: 0, // Don't cache results ...options, diff --git a/frontend/src/app/api/queries/useGetSearchQuery.ts b/frontend/src/app/api/queries/useGetSearchQuery.ts index 9928af3d..37798ce5 100644 --- a/frontend/src/app/api/queries/useGetSearchQuery.ts +++ b/frontend/src/app/api/queries/useGetSearchQuery.ts @@ -34,21 +34,28 @@ export interface ChunkResult { export interface File { filename: string; mimetype: string; - chunkCount: number; - avgScore: number; + chunkCount?: number; + avgScore?: number; source_url: string; - owner: string; - owner_name: string; - owner_email: string; + owner?: string; + owner_name?: string; + owner_email?: string; size: number; connector_type: string; - chunks: ChunkResult[]; + status?: + | "processing" + | "active" + | "unavailable" + | "failed" + | "hidden" + | "sync"; + chunks?: ChunkResult[]; } export const useGetSearchQuery = ( query: string, queryData?: ParsedQueryData | null, - options?: Omit + options?: Omit, ) => { const queryClient = useQueryClient(); @@ -149,7 +156,7 @@ export const useGetSearchQuery = ( } }); - const files: File[] = Array.from(fileMap.values()).map(file => ({ + const files: File[] = Array.from(fileMap.values()).map((file) => ({ filename: file.filename, mimetype: file.mimetype, chunkCount: file.chunks.length, @@ -173,11 +180,11 @@ export const useGetSearchQuery = ( const queryResult = useQuery( { queryKey: ["search", effectiveQuery], - placeholderData: prev => prev, + placeholderData: (prev) => prev, queryFn: getFiles, ...options, }, - queryClient + queryClient, ); return queryResult; diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index ee116a71..5155f4e2 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -1,16 +1,10 @@ "use client"; -import { - Building2, - Cloud, - HardDrive, - Search, - Trash2, - X, -} from "lucide-react"; -import { AgGridReact, CustomCellRendererProps } from "ag-grid-react"; -import { useCallback, useState, useRef, ChangeEvent } from "react"; +import type { ColDef } from "ag-grid-community"; +import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; +import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react"; import { useRouter } from "next/navigation"; +import { type ChangeEvent, useCallback, useRef, useState } from "react"; import { SiGoogledrive } from "react-icons/si"; import { TbBrandOnedrive } from "react-icons/tb"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; @@ -19,13 +13,13 @@ import { Button } from "@/components/ui/button"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useTask } from "@/contexts/task-context"; import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery"; -import { ColDef } from "ag-grid-community"; import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/agGridStyles.css"; +import { toast } from "sonner"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; +import { StatusBadge } from "@/components/ui/status-badge"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; -import { toast } from "sonner"; // Function to get the appropriate icon for a connector type function getSourceIcon(connectorType?: string) { @@ -51,7 +45,7 @@ function getSourceIcon(connectorType?: string) { function SearchPage() { const router = useRouter(); - const { isMenuOpen } = useTask(); + const { isMenuOpen, files: taskFiles } = useTask(); const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = useKnowledgeFilter(); const [selectedRows, setSelectedRows] = useState([]); @@ -61,14 +55,38 @@ function SearchPage() { const { data = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", - parsedFilterData + parsedFilterData, ); const handleTableSearch = (e: ChangeEvent) => { gridRef.current?.api.setGridOption("quickFilterText", e.target.value); }; - const fileResults = data as File[]; + // Convert TaskFiles to File format and merge with backend results + const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { + return { + filename: taskFile.filename, + mimetype: taskFile.mimetype, + source_url: taskFile.source_url, + size: taskFile.size, + connector_type: taskFile.connector_type, + status: taskFile.status, + }; + }); + + const backendFiles = data as File[]; + + const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { + return ( + taskFile.status !== "active" && + !backendFiles.some( + (backendFile) => backendFile.filename === taskFile.filename, + ) + ); + }); + + // Combine task files first, then backend files + const fileResults = [...backendFiles, ...filteredTaskFiles]; const gridRef = useRef(null); @@ -82,13 +100,14 @@ function SearchPage() { minWidth: 220, cellRenderer: ({ data, value }: CustomCellRendererProps) => { return ( -
{ router.push( `/knowledge/chunks?filename=${encodeURIComponent( - data?.filename ?? "" - )}` + data?.filename ?? "", + )}`, ); }} > @@ -96,7 +115,7 @@ function SearchPage() { {value} -
+ ); }, }, @@ -119,6 +138,7 @@ function SearchPage() { { field: "chunkCount", headerName: "Chunks", + valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", }, { field: "avgScore", @@ -127,11 +147,20 @@ function SearchPage() { cellRenderer: ({ value }: CustomCellRendererProps) => { return ( - {value.toFixed(2)} + {value?.toFixed(2) ?? "-"} ); }, }, + { + field: "status", + headerName: "Status", + cellRenderer: ({ data }: CustomCellRendererProps) => { + // Default to 'active' status if no status is provided + const status = data?.status || "active"; + return ; + }, + }, { cellRenderer: ({ data }: CustomCellRendererProps) => { return ; @@ -172,7 +201,7 @@ function SearchPage() { try { // Delete each file individually since the API expects one filename at a time const deletePromises = selectedRows.map((row) => - deleteDocumentMutation.mutateAsync({ filename: row.filename }) + deleteDocumentMutation.mutateAsync({ filename: row.filename }), ); await Promise.all(deletePromises); @@ -180,7 +209,7 @@ function SearchPage() { toast.success( `Successfully deleted ${selectedRows.length} document${ selectedRows.length > 1 ? "s" : "" - }` + }`, ); setSelectedRows([]); setShowBulkDeleteDialog(false); @@ -193,7 +222,7 @@ function SearchPage() { toast.error( error instanceof Error ? error.message - : "Failed to delete some documents" + : "Failed to delete some documents", ); } }; diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index eea555c2..f49ff393 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -4,11 +4,13 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react"; import { useSearchParams } from "next/navigation"; import { Suspense, useCallback, useEffect, useState } from "react"; import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation"; +import { + useGetIBMModelsQuery, + useGetOllamaModelsQuery, + useGetOpenAIModelsQuery, +} from "@/app/api/queries/useGetModelsQuery"; import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery"; -import { useGetOpenAIModelsQuery, useGetOllamaModelsQuery, useGetIBMModelsQuery } from "@/app/api/queries/useGetModelsQuery"; import { ConfirmationDialog } from "@/components/confirmation-dialog"; -import { ModelSelectItems } from "./helpers/model-select-item"; -import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers"; import { ProtectedRoute } from "@/components/protected-route"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -33,6 +35,8 @@ import { Textarea } from "@/components/ui/textarea"; import { useAuth } from "@/contexts/auth-context"; import { useTask } from "@/contexts/task-context"; import { useDebounce } from "@/lib/debounce"; +import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers"; +import { ModelSelectItems } from "./helpers/model-select-item"; const MAX_SYSTEM_PROMPT_CHARS = 2000; @@ -105,42 +109,46 @@ function KnowledgeSourcesPage() { // Fetch settings using React Query const { data: settings = {} } = useGetSettingsQuery({ - enabled: isAuthenticated, + enabled: isAuthenticated || isNoAuthMode, }); // Get the current provider from settings - const currentProvider = (settings.provider?.model_provider || 'openai') as ModelProvider; + const currentProvider = (settings.provider?.model_provider || + "openai") as ModelProvider; // Fetch available models based on provider const { data: openaiModelsData } = useGetOpenAIModelsQuery( undefined, // Let backend use stored API key from configuration { - enabled: isAuthenticated && currentProvider === 'openai', - } + enabled: + (isAuthenticated || isNoAuthMode) && currentProvider === "openai", + }, ); const { data: ollamaModelsData } = useGetOllamaModelsQuery( undefined, // No params for now, could be extended later { - enabled: isAuthenticated && currentProvider === 'ollama', - } + enabled: + (isAuthenticated || isNoAuthMode) && currentProvider === "ollama", + }, ); const { data: ibmModelsData } = useGetIBMModelsQuery( undefined, // No params for now, could be extended later { - enabled: isAuthenticated && currentProvider === 'ibm', - } + enabled: (isAuthenticated || isNoAuthMode) && currentProvider === "ibm", + }, ); // Select the appropriate models data based on provider - const modelsData = currentProvider === 'openai' - ? openaiModelsData - : currentProvider === 'ollama' - ? ollamaModelsData - : currentProvider === 'ibm' - ? ibmModelsData - : openaiModelsData; // fallback to openai + const modelsData = + currentProvider === "openai" + ? openaiModelsData + : currentProvider === "ollama" + ? ollamaModelsData + : currentProvider === "ibm" + ? ibmModelsData + : openaiModelsData; // fallback to openai // Mutations const updateFlowSettingMutation = useUpdateFlowSettingMutation({ @@ -152,7 +160,6 @@ function KnowledgeSourcesPage() { }, }); - // Debounced update function const debouncedUpdate = useDebounce( (variables: Parameters[0]) => { @@ -224,7 +231,6 @@ function KnowledgeSourcesPage() { debouncedUpdate({ doclingPresets: mode }); }; - // Helper function to get connector icon const getConnectorIcon = useCallback((iconName: string) => { const iconMap: { [key: string]: React.ReactElement } = { @@ -613,7 +619,11 @@ function KnowledgeSourcesPage() { Language Model m.default)?.value || "text-embedding-ada-002" + settings.knowledge?.embedding_model || + modelsData?.embedding_models?.find((m) => m.default)?.value || + "text-embedding-ada-002" } onValueChange={handleEmbeddingModelChange} > @@ -746,7 +771,9 @@ function KnowledgeSourcesPage() { @@ -807,7 +834,10 @@ function KnowledgeSourcesPage() {
-