From f6bb375860fc821af56ac278939e5d34e5c89300 Mon Sep 17 00:00:00 2001 From: Mendon Kissling <59585235+mendonk@users.noreply.github.com> Date: Tue, 30 Sep 2025 09:51:42 -0400 Subject: [PATCH 01/26] init --- docs/docs/core-components/ingestion.mdx | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 docs/docs/core-components/ingestion.mdx diff --git a/docs/docs/core-components/ingestion.mdx b/docs/docs/core-components/ingestion.mdx new file mode 100644 index 00000000..d240d53e --- /dev/null +++ b/docs/docs/core-components/ingestion.mdx @@ -0,0 +1,23 @@ +--- +title: Docling Ingestion +slug: /ingestion +--- + +import Icon from "@site/src/components/icon/icon"; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx'; + +OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline. +More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service. + +OpenRAG chose Docling for its support for a wide variety of file formats, high performance, and advanced understanding of tables and images. + +## Docling ingestion settings + +These settings control the Docling ingestion parameters. + +OpenRAG will warn you if `docling-serve` is not running. +To start or stop `docling-serve` or any other native services, in the TUI main menu, click **Start Native Services** or **Stop Native Services**. + +## Use OpenRAG default ingestion instead of Docling \ No newline at end of file From bc055319affb26f10f34256232ad04c722d01dc5 Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Tue, 30 Sep 2025 09:45:39 -0600 Subject: [PATCH 02/26] fixed empty state --- frontend/src/app/knowledge/page.tsx | 47 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 5155f4e2..dbd7ac2f 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -55,7 +55,7 @@ function SearchPage() { const { data = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", - parsedFilterData, + parsedFilterData ); const handleTableSearch = (e: ChangeEvent) => { @@ -63,7 +63,7 @@ function SearchPage() { }; // Convert TaskFiles to File format and merge with backend results - const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { + const taskFilesAsFiles: File[] = taskFiles.map(taskFile => { return { filename: taskFile.filename, mimetype: taskFile.mimetype, @@ -76,11 +76,11 @@ function SearchPage() { const backendFiles = data as File[]; - const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { + const filteredTaskFiles = taskFilesAsFiles.filter(taskFile => { return ( taskFile.status !== "active" && !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename, + backendFile => backendFile.filename === taskFile.filename ) ); }); @@ -106,8 +106,8 @@ function SearchPage() { onClick={() => { router.push( `/knowledge/chunks?filename=${encodeURIComponent( - data?.filename ?? "", - )}`, + data?.filename ?? "" + )}` ); }} > @@ -122,7 +122,7 @@ function SearchPage() { { field: "size", headerName: "Size", - valueFormatter: (params) => + valueFormatter: params => params.value ? `${Math.round(params.value / 1024)} KB` : "-", }, { @@ -132,13 +132,13 @@ function SearchPage() { { field: "owner", headerName: "Owner", - valueFormatter: (params) => + valueFormatter: params => params.data?.owner_name || params.data?.owner_email || "—", }, { field: "chunkCount", headerName: "Chunks", - valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", + valueFormatter: params => params.data?.chunkCount?.toString() || "-", }, { field: "avgScore", @@ -200,8 +200,8 @@ function SearchPage() { try { // Delete each file individually since the API expects one filename at a time - const deletePromises = selectedRows.map((row) => - deleteDocumentMutation.mutateAsync({ filename: row.filename }), + const deletePromises = selectedRows.map(row => + deleteDocumentMutation.mutateAsync({ filename: row.filename }) ); await Promise.all(deletePromises); @@ -209,7 +209,7 @@ function SearchPage() { toast.success( `Successfully deleted ${selectedRows.length} document${ selectedRows.length > 1 ? "s" : "" - }`, + }` ); setSelectedRows([]); setShowBulkDeleteDialog(false); @@ -222,7 +222,7 @@ function SearchPage() { toast.error( error instanceof Error ? error.message - : "Failed to delete some documents", + : "Failed to delete some documents" ); } }; @@ -313,18 +313,17 @@ function SearchPage() { rowSelection="multiple" rowMultiSelectWithClick={false} suppressRowClickSelection={true} - getRowId={(params) => params.data.filename} - domLayout="autoHeight" + getRowId={params => params.data.filename} + domLayout="normal" onSelectionChanged={onSelectionChanged} noRowsOverlayComponent={() => ( -
- -

- No documents found -

-

- Try adjusting your search terms -

+
+
+ No knowledge +
+
+ Add files from local or your preferred cloud. +
)} /> @@ -342,7 +341,7 @@ function SearchPage() { }? This will remove all chunks and data associated with these documents. This action cannot be undone. Documents to be deleted: -${selectedRows.map((row) => `• ${row.filename}`).join("\n")}`} +${selectedRows.map(row => `• ${row.filename}`).join("\n")}`} confirmText="Delete All" onConfirm={handleBulkDelete} isLoading={deleteDocumentMutation.isPending} From 0a0dbe15ef5c91a2f5878570d5c64859d999076e Mon Sep 17 00:00:00 2001 From: Deon Sanchez <69873175+deon-sanchez@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:56:43 -0600 Subject: [PATCH 03/26] Enhance settings page with new routing and UI updates; update connector descriptions for clarity --- frontend/src/app/settings/page.tsx | 321 +++++++++++------------ src/connectors/google_drive/connector.py | 2 +- src/connectors/onedrive/connector.py | 2 +- src/connectors/sharepoint/connector.py | 2 +- 4 files changed, 162 insertions(+), 165 deletions(-) diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index a63d91d3..72f36d91 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -1,7 +1,7 @@ "use client"; -import { ArrowUpRight, Loader2, PlugZap, RefreshCw } from "lucide-react"; -import { useSearchParams } from "next/navigation"; +import { ArrowUpRight, Loader2, PlugZap, Plus, RefreshCw } from "lucide-react"; +import { useRouter, useSearchParams } from "next/navigation"; import { Suspense, useCallback, useEffect, useState } from "react"; import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation"; import { @@ -35,15 +35,14 @@ import { Textarea } from "@/components/ui/textarea"; import { useAuth } from "@/contexts/auth-context"; import { useTask } from "@/contexts/task-context"; import { useDebounce } from "@/lib/debounce"; -import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from "@/lib/constants"; +import { + DEFAULT_AGENT_SETTINGS, + DEFAULT_KNOWLEDGE_SETTINGS, + UI_CONSTANTS, +} from "@/lib/constants"; import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers"; import { ModelSelectItems } from "./helpers/model-select-item"; import { LabelWrapper } from "@/components/label-wrapper"; -import { - Tooltip, - TooltipContent, - TooltipTrigger, -} from "@radix-ui/react-tooltip"; const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS; @@ -97,6 +96,7 @@ function KnowledgeSourcesPage() { const { isAuthenticated, isNoAuthMode } = useAuth(); const { addTask, tasks } = useTask(); const searchParams = useSearchParams(); + const router = useRouter(); // Connectors state const [connectors, setConnectors] = useState([]); @@ -162,7 +162,7 @@ function KnowledgeSourcesPage() { onSuccess: () => { console.log("Setting updated successfully"); }, - onError: (error) => { + onError: error => { console.error("Failed to update setting:", error.message); }, }); @@ -280,8 +280,8 @@ function KnowledgeSourcesPage() { // Initialize connectors list with metadata from backend const initialConnectors = connectorTypes - .filter((type) => connectorsResult.connectors[type].available) // Only show available connectors - .map((type) => ({ + .filter(type => connectorsResult.connectors[type].available) // Only show available connectors + .map(type => ({ id: type, name: connectorsResult.connectors[type].name, description: connectorsResult.connectors[type].description, @@ -304,8 +304,8 @@ function KnowledgeSourcesPage() { ); const isConnected = activeConnection !== undefined; - setConnectors((prev) => - prev.map((c) => + setConnectors(prev => + prev.map(c => c.type === connectorType ? { ...c, @@ -324,7 +324,7 @@ function KnowledgeSourcesPage() { const handleConnect = async (connector: Connector) => { setIsConnecting(connector.id); - setSyncResults((prev) => ({ ...prev, [connector.id]: null })); + setSyncResults(prev => ({ ...prev, [connector.id]: null })); try { // Use the shared auth callback URL, same as connectors page @@ -374,58 +374,58 @@ function KnowledgeSourcesPage() { } }; - const handleSync = async (connector: Connector) => { - if (!connector.connectionId) return; + // const handleSync = async (connector: Connector) => { + // if (!connector.connectionId) return; - setIsSyncing(connector.id); - setSyncResults((prev) => ({ ...prev, [connector.id]: null })); + // setIsSyncing(connector.id); + // setSyncResults(prev => ({ ...prev, [connector.id]: null })); - try { - const syncBody: { - connection_id: string; - max_files?: number; - selected_files?: string[]; - } = { - connection_id: connector.connectionId, - max_files: syncAllFiles ? 0 : maxFiles || undefined, - }; + // try { + // const syncBody: { + // connection_id: string; + // max_files?: number; + // selected_files?: string[]; + // } = { + // connection_id: connector.connectionId, + // max_files: syncAllFiles ? 0 : maxFiles || undefined, + // }; - // Note: File selection is now handled via the cloud connectors dialog + // // Note: File selection is now handled via the cloud connectors dialog - const response = await fetch(`/api/connectors/${connector.type}/sync`, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(syncBody), - }); + // const response = await fetch(`/api/connectors/${connector.type}/sync`, { + // method: "POST", + // headers: { + // "Content-Type": "application/json", + // }, + // body: JSON.stringify(syncBody), + // }); - const result = await response.json(); + // const result = await response.json(); - if (response.status === 201) { - const taskId = result.task_id; - if (taskId) { - addTask(taskId); - setSyncResults((prev) => ({ - ...prev, - [connector.id]: { - processed: 0, - total: result.total_files || 0, - }, - })); - } - } else if (response.ok) { - setSyncResults((prev) => ({ ...prev, [connector.id]: result })); - // Note: Stats will auto-refresh via task completion watcher for async syncs - } else { - console.error("Sync failed:", result.error); - } - } catch (error) { - console.error("Sync error:", error); - } finally { - setIsSyncing(null); - } - }; + // if (response.status === 201) { + // const taskId = result.task_id; + // if (taskId) { + // addTask(taskId); + // setSyncResults(prev => ({ + // ...prev, + // [connector.id]: { + // processed: 0, + // total: result.total_files || 0, + // }, + // })); + // } + // } else if (response.ok) { + // setSyncResults(prev => ({ ...prev, [connector.id]: result })); + // // Note: Stats will auto-refresh via task completion watcher for async syncs + // } else { + // console.error("Sync failed:", result.error); + // } + // } catch (error) { + // console.error("Sync error:", error); + // } finally { + // setIsSyncing(null); + // } + // }; const getStatusBadge = (status: Connector["status"]) => { switch (status) { @@ -461,6 +461,11 @@ function KnowledgeSourcesPage() { } }; + const navigateToKnowledgePage = (connector: Connector) => { + const provider = connector.type.replace(/-/g, "_"); + router.push(`/upload/${provider}`); + }; + // Check connector status on mount and when returning from OAuth useEffect(() => { if (isAuthenticated) { @@ -480,9 +485,9 @@ function KnowledgeSourcesPage() { // Watch for task completions and refresh stats useEffect(() => { // Find newly completed tasks by comparing with previous state - const newlyCompletedTasks = tasks.filter((task) => { + const newlyCompletedTasks = tasks.filter(task => { const wasCompleted = - prevTasks.find((prev) => prev.task_id === task.task_id)?.status === + prevTasks.find(prev => prev.task_id === task.task_id)?.status === "completed"; return task.status === "completed" && !wasCompleted; }); @@ -536,7 +541,7 @@ function KnowledgeSourcesPage() { fetch(`/api/reset-flow/retrieval`, { method: "POST", }) - .then((response) => { + .then(response => { if (response.ok) { return response.json(); } @@ -549,7 +554,7 @@ function KnowledgeSourcesPage() { handleModelChange(DEFAULT_AGENT_SETTINGS.llm_model); closeDialog(); // Close after successful completion }) - .catch((error) => { + .catch(error => { console.error("Error restoring retrieval flow:", error); closeDialog(); // Close even on error (could show error toast instead) }); @@ -559,7 +564,7 @@ function KnowledgeSourcesPage() { fetch(`/api/reset-flow/ingest`, { method: "POST", }) - .then((response) => { + .then(response => { if (response.ok) { return response.json(); } @@ -572,7 +577,7 @@ function KnowledgeSourcesPage() { setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode); closeDialog(); // Close after successful completion }) - .catch((error) => { + .catch(error => { console.error("Error restoring ingest flow:", error); closeDialog(); // Close even on error (could show error toast instead) }); @@ -589,85 +594,88 @@ function KnowledgeSourcesPage() {
{/* Conditional Sync Settings or No-Auth Message */} - {isNoAuthMode ? ( - - - - Cloud connectors are only available with auth mode enabled - - - Please provide the following environment variables and restart: - - - -
-
- # make here https://console.cloud.google.com/apis/credentials + { + isNoAuthMode ? ( + + + + Cloud connectors are only available with auth mode enabled + + + Please provide the following environment variables and + restart: + + + +
+
+ # make here + https://console.cloud.google.com/apis/credentials +
+
GOOGLE_OAUTH_CLIENT_ID=
+
GOOGLE_OAUTH_CLIENT_SECRET=
-
GOOGLE_OAUTH_CLIENT_ID=
-
GOOGLE_OAUTH_CLIENT_SECRET=
-
- - - ) : ( -
-
-

Sync Settings

-

- Configure how many files to sync when manually triggering a sync -

-
-
-
- { - setSyncAllFiles(!!checked); - if (checked) { - setMaxFiles(0); - } else { - setMaxFiles(10); - } - }} - /> - -
- -
- setMaxFiles(parseInt(e.target.value) || 10)} - disabled={syncAllFiles} - className="w-16 min-w-16 max-w-16 flex-shrink-0 disabled:opacity-50 disabled:cursor-not-allowed" - min="1" - max="100" - title={ - syncAllFiles - ? "Disabled when 'Sync all files' is checked" - : "Leave blank or set to 0 for unlimited" - } - /> -
-
-
- )} + + + ) : null + //
+ //
+ //

Sync Settings

+ //

+ // Configure how many files to sync when manually triggering a sync + //

+ //
+ //
+ //
+ // { + // setSyncAllFiles(!!checked); + // if (checked) { + // setMaxFiles(0); + // } else { + // setMaxFiles(10); + // } + // }} + // /> + // + //
+ // + //
+ // setMaxFiles(parseInt(e.target.value) || 10)} + // disabled={syncAllFiles} + // className="w-16 min-w-16 max-w-16 flex-shrink-0 disabled:opacity-50 disabled:cursor-not-allowed" + // min="1" + // max="100" + // title={ + // syncAllFiles + // ? "Disabled when 'Sync all files' is checked" + // : "Leave blank or set to 0 for unlimited" + // } + // /> + //
+ //
+ //
+ } {/* Connectors Grid */}
- {connectors.map((connector) => ( + {connectors.map(connector => (
@@ -689,22 +697,13 @@ function KnowledgeSourcesPage() { {connector.status === "connected" ? (
{syncResults[connector.id] && ( @@ -810,7 +809,7 @@ function KnowledgeSourcesPage() { } confirmText="Proceed" confirmIcon={} - onConfirm={(closeDialog) => + onConfirm={closeDialog => handleEditInLangflow("chat", closeDialog) } variant="warning" @@ -830,8 +829,7 @@ function KnowledgeSourcesPage() {