diff --git a/.gitignore b/.gitignore index 9c99e617..484db58d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ wheels/ .DS_Store config/ + +.docling.pid diff --git a/docker-compose-cpu.yml b/docker-compose-cpu.yml index 570bc3b8..0c09254a 100644 --- a/docker-compose-cpu.yml +++ b/docker-compose-cpu.yml @@ -43,7 +43,7 @@ services: # build: # context: . # dockerfile: Dockerfile.backend - # container_name: openrag-backend + container_name: openrag-backend depends_on: - langflow environment: diff --git a/docker-compose.yml b/docker-compose.yml index b97f7cca..be9bcbc9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,7 +43,7 @@ services: # build: # context: . # dockerfile: Dockerfile.backend - # container_name: openrag-backend + container_name: openrag-backend depends_on: - langflow environment: diff --git a/frontend/components/duplicate-handling-dialog.tsx b/frontend/components/duplicate-handling-dialog.tsx new file mode 100644 index 00000000..d5cb2edf --- /dev/null +++ b/frontend/components/duplicate-handling-dialog.tsx @@ -0,0 +1,66 @@ +"use client"; + +import { RotateCcw } from "lucide-react"; +import type React from "react"; +import { Button } from "./ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "./ui/dialog"; + +interface DuplicateHandlingDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + onOverwrite: () => void | Promise; + isLoading?: boolean; +} + +export const DuplicateHandlingDialog: React.FC< + DuplicateHandlingDialogProps +> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => { + const handleOverwrite = async () => { + await onOverwrite(); + onOpenChange(false); + }; + + return ( + + + + Overwrite document + + Overwriting will replace the existing document with another version. + This can't be undone. + + + + + + + + + + ); +}; diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index ee49fc3a..7fe84259 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -1,625 +1,696 @@ "use client"; +import { useQueryClient } from "@tanstack/react-query"; import { - ChevronDown, - Cloud, - FolderOpen, - Loader2, - PlugZap, - Plus, - Upload, + ChevronDown, + Cloud, + FolderOpen, + Loader2, + PlugZap, + Plus, + Upload, } from "lucide-react"; import { useRouter } from "next/navigation"; import { useEffect, useRef, useState } from "react"; import { toast } from "sonner"; +import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery"; +import { DuplicateHandlingDialog } from "@/components/duplicate-handling-dialog"; import { Button } from "@/components/ui/button"; import { - Dialog, - DialogContent, - DialogDescription, - DialogHeader, - DialogTitle, + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, } from "@/components/ui/dialog"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { useTask } from "@/contexts/task-context"; import { cn } from "@/lib/utils"; +import type { File as SearchFile } from "@/src/app/api/queries/useGetSearchQuery"; interface KnowledgeDropdownProps { - active?: boolean; - variant?: "navigation" | "button"; + active?: boolean; + variant?: "navigation" | "button"; } export function KnowledgeDropdown({ - active, - variant = "navigation", + active, + variant = "navigation", }: KnowledgeDropdownProps) { - const { addTask } = useTask(); - const router = useRouter(); - const [isOpen, setIsOpen] = useState(false); - const [showFolderDialog, setShowFolderDialog] = useState(false); - const [showS3Dialog, setShowS3Dialog] = useState(false); - const [awsEnabled, setAwsEnabled] = useState(false); - const [folderPath, setFolderPath] = useState("/app/documents/"); - const [bucketUrl, setBucketUrl] = useState("s3://"); - const [folderLoading, setFolderLoading] = useState(false); - const [s3Loading, setS3Loading] = useState(false); - const [fileUploading, setFileUploading] = useState(false); - const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); - const [cloudConnectors, setCloudConnectors] = useState<{ - [key: string]: { - name: string; - available: boolean; - connected: boolean; - hasToken: boolean; - }; - }>({}); - const fileInputRef = useRef(null); - const dropdownRef = useRef(null); + const { addTask } = useTask(); + const { refetch: refetchTasks } = useGetTasksQuery(); + const queryClient = useQueryClient(); + const router = useRouter(); + const [isOpen, setIsOpen] = useState(false); + const [showFolderDialog, setShowFolderDialog] = useState(false); + const [showS3Dialog, setShowS3Dialog] = useState(false); + const [showDuplicateDialog, setShowDuplicateDialog] = useState(false); + const [awsEnabled, setAwsEnabled] = useState(false); + const [folderPath, setFolderPath] = useState("/app/documents/"); + const [bucketUrl, setBucketUrl] = useState("s3://"); + const [folderLoading, setFolderLoading] = useState(false); + const [s3Loading, setS3Loading] = useState(false); + const [fileUploading, setFileUploading] = useState(false); + const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); + const [pendingFile, setPendingFile] = useState(null); + const [duplicateFilename, setDuplicateFilename] = useState(""); + const [cloudConnectors, setCloudConnectors] = useState<{ + [key: string]: { + name: string; + available: boolean; + connected: boolean; + hasToken: boolean; + }; + }>({}); + const fileInputRef = useRef(null); + const dropdownRef = useRef(null); - // Check AWS availability and cloud connectors on mount - useEffect(() => { - const checkAvailability = async () => { - try { - // Check AWS - const awsRes = await fetch("/api/upload_options"); - if (awsRes.ok) { - const awsData = await awsRes.json(); - setAwsEnabled(Boolean(awsData.aws)); - } + // Check AWS availability and cloud connectors on mount + useEffect(() => { + const checkAvailability = async () => { + try { + // Check AWS + const awsRes = await fetch("/api/upload_options"); + if (awsRes.ok) { + const awsData = await awsRes.json(); + setAwsEnabled(Boolean(awsData.aws)); + } - // Check cloud connectors - const connectorsRes = await fetch("/api/connectors"); - if (connectorsRes.ok) { - const connectorsResult = await connectorsRes.json(); - const cloudConnectorTypes = [ - "google_drive", - "onedrive", - "sharepoint", - ]; - const connectorInfo: { - [key: string]: { - name: string; - available: boolean; - connected: boolean; - hasToken: boolean; - }; - } = {}; + // Check cloud connectors + const connectorsRes = await fetch("/api/connectors"); + if (connectorsRes.ok) { + const connectorsResult = await connectorsRes.json(); + const cloudConnectorTypes = [ + "google_drive", + "onedrive", + "sharepoint", + ]; + const connectorInfo: { + [key: string]: { + name: string; + available: boolean; + connected: boolean; + hasToken: boolean; + }; + } = {}; - for (const type of cloudConnectorTypes) { - if (connectorsResult.connectors[type]) { - connectorInfo[type] = { - name: connectorsResult.connectors[type].name, - available: connectorsResult.connectors[type].available, - connected: false, - hasToken: false, - }; + for (const type of cloudConnectorTypes) { + if (connectorsResult.connectors[type]) { + connectorInfo[type] = { + name: connectorsResult.connectors[type].name, + available: connectorsResult.connectors[type].available, + connected: false, + hasToken: false, + }; - // Check connection status - try { - const statusRes = await fetch(`/api/connectors/${type}/status`); - if (statusRes.ok) { - const statusData = await statusRes.json(); - const connections = statusData.connections || []; - const activeConnection = connections.find( - (conn: { is_active: boolean; connection_id: string }) => - conn.is_active - ); - const isConnected = activeConnection !== undefined; + // Check connection status + try { + const statusRes = await fetch(`/api/connectors/${type}/status`); + if (statusRes.ok) { + const statusData = await statusRes.json(); + const connections = statusData.connections || []; + const activeConnection = connections.find( + (conn: { is_active: boolean; connection_id: string }) => + conn.is_active, + ); + const isConnected = activeConnection !== undefined; - if (isConnected && activeConnection) { - connectorInfo[type].connected = true; + if (isConnected && activeConnection) { + connectorInfo[type].connected = true; - // Check token availability - try { - const tokenRes = await fetch( - `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}` - ); - if (tokenRes.ok) { - const tokenData = await tokenRes.json(); - if (tokenData.access_token) { - connectorInfo[type].hasToken = true; - } - } - } catch { - // Token check failed - } - } - } - } catch { - // Status check failed - } - } - } + // Check token availability + try { + const tokenRes = await fetch( + `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`, + ); + if (tokenRes.ok) { + const tokenData = await tokenRes.json(); + if (tokenData.access_token) { + connectorInfo[type].hasToken = true; + } + } + } catch { + // Token check failed + } + } + } + } catch { + // Status check failed + } + } + } - setCloudConnectors(connectorInfo); - } - } catch (err) { - console.error("Failed to check availability", err); - } - }; - checkAvailability(); - }, []); + setCloudConnectors(connectorInfo); + } + } catch (err) { + console.error("Failed to check availability", err); + } + }; + checkAvailability(); + }, []); - // Handle click outside to close dropdown - useEffect(() => { - const handleClickOutside = (event: MouseEvent) => { - if ( - dropdownRef.current && - !dropdownRef.current.contains(event.target as Node) - ) { - setIsOpen(false); - } - }; + // Handle click outside to close dropdown + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + } + }; - if (isOpen) { - document.addEventListener("mousedown", handleClickOutside); - return () => - document.removeEventListener("mousedown", handleClickOutside); - } - }, [isOpen]); + if (isOpen) { + document.addEventListener("mousedown", handleClickOutside); + return () => + document.removeEventListener("mousedown", handleClickOutside); + } + }, [isOpen]); - const handleFileUpload = () => { - fileInputRef.current?.click(); - }; + const handleFileUpload = () => { + fileInputRef.current?.click(); + }; - const handleFileChange = async (e: React.ChangeEvent) => { - const files = e.target.files; - if (files && files.length > 0) { - // Close dropdown and disable button immediately after file selection - setIsOpen(false); - setFileUploading(true); + const handleFileChange = async (e: React.ChangeEvent) => { + const files = e.target.files; + if (files && files.length > 0) { + const file = files[0]; - // Trigger the same file upload event as the chat page - window.dispatchEvent( - new CustomEvent("fileUploadStart", { - detail: { filename: files[0].name }, - }) - ); + // Close dropdown immediately after file selection + setIsOpen(false); - try { - const formData = new FormData(); - formData.append("file", files[0]); + try { + // Check if filename already exists (using ORIGINAL filename) + console.log("[Duplicate Check] Checking file:", file.name); + const checkResponse = await fetch( + `/api/documents/check-filename?filename=${encodeURIComponent(file.name)}`, + ); - // Use router upload and ingest endpoint (automatically routes based on configuration) - const uploadIngestRes = await fetch("/api/router/upload_ingest", { - method: "POST", - body: formData, - }); + console.log("[Duplicate Check] Response status:", checkResponse.status); - const uploadIngestJson = await uploadIngestRes.json(); + if (!checkResponse.ok) { + const errorText = await checkResponse.text(); + console.error("[Duplicate Check] Error response:", errorText); + throw new Error( + `Failed to check duplicates: ${checkResponse.statusText}`, + ); + } - if (!uploadIngestRes.ok) { - throw new Error( - uploadIngestJson?.error || "Upload and ingest failed" - ); - } + const checkData = await checkResponse.json(); + console.log("[Duplicate Check] Result:", checkData); - // Extract results from the response - handle both unified and simple formats - const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id; - const filePath = - uploadIngestJson?.upload?.path || - uploadIngestJson?.path || - "uploaded"; - const runJson = uploadIngestJson?.ingestion; - const deleteResult = uploadIngestJson?.deletion; + if (checkData.exists) { + // Show duplicate handling dialog + console.log("[Duplicate Check] Duplicate detected, showing dialog"); + setPendingFile(file); + setDuplicateFilename(file.name); + setShowDuplicateDialog(true); + // Reset file input + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + return; + } - if (!fileId) { - throw new Error("Upload successful but no file id returned"); - } + // No duplicate, proceed with upload + console.log("[Duplicate Check] No duplicate, proceeding with upload"); + await uploadFile(file, false); + } catch (error) { + console.error("[Duplicate Check] Exception:", error); + toast.error("Failed to check for duplicates", { + description: error instanceof Error ? error.message : "Unknown error", + }); + } + } - // Check if ingestion actually succeeded - if ( - runJson && - runJson.status !== "COMPLETED" && - runJson.status !== "SUCCESS" - ) { - const errorMsg = runJson.error || "Ingestion pipeline failed"; - throw new Error( - `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.` - ); - } + // Reset file input + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + }; - // Log deletion status if provided - if (deleteResult) { - if (deleteResult.status === "deleted") { - console.log( - "File successfully cleaned up from Langflow:", - deleteResult.file_id - ); - } else if (deleteResult.status === "delete_failed") { - console.warn( - "Failed to cleanup file from Langflow:", - deleteResult.error - ); - } - } + const uploadFile = async (file: File, replace: boolean) => { + setFileUploading(true); - // Notify UI - window.dispatchEvent( - new CustomEvent("fileUploaded", { - detail: { - file: files[0], - result: { - file_id: fileId, - file_path: filePath, - run: runJson, - deletion: deleteResult, - unified: true, - }, - }, - }) - ); + // Trigger the same file upload event as the chat page + window.dispatchEvent( + new CustomEvent("fileUploadStart", { + detail: { filename: file.name }, + }), + ); - // Trigger search refresh after successful ingestion - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } catch (error) { - window.dispatchEvent( - new CustomEvent("fileUploadError", { - detail: { - filename: files[0].name, - error: error instanceof Error ? error.message : "Upload failed", - }, - }) - ); - } finally { - window.dispatchEvent(new CustomEvent("fileUploadComplete")); - setFileUploading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - } + try { + const formData = new FormData(); + formData.append("file", file); + formData.append("replace_duplicates", replace.toString()); - // Reset file input - if (fileInputRef.current) { - fileInputRef.current.value = ""; - } - }; + // Use router upload and ingest endpoint (automatically routes based on configuration) + const uploadIngestRes = await fetch("/api/router/upload_ingest", { + method: "POST", + body: formData, + }); - const handleFolderUpload = async () => { - if (!folderPath.trim()) return; + const uploadIngestJson = await uploadIngestRes.json(); - setFolderLoading(true); - setShowFolderDialog(false); + if (!uploadIngestRes.ok) { + throw new Error(uploadIngestJson?.error || "Upload and ingest failed"); + } - try { - const response = await fetch("/api/upload_path", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ path: folderPath }), - }); + // Extract results from the response - handle both unified and simple formats + const fileId = + uploadIngestJson?.upload?.id || + uploadIngestJson?.id || + uploadIngestJson?.task_id; + const filePath = + uploadIngestJson?.upload?.path || uploadIngestJson?.path || "uploaded"; + const runJson = uploadIngestJson?.ingestion; + const deleteResult = uploadIngestJson?.deletion; + console.log("c", uploadIngestJson); + if (!fileId) { + throw new Error("Upload successful but no file id returned"); + } + // Check if ingestion actually succeeded + if ( + runJson && + runJson.status !== "COMPLETED" && + runJson.status !== "SUCCESS" + ) { + const errorMsg = runJson.error || "Ingestion pipeline failed"; + throw new Error( + `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`, + ); + } + // Log deletion status if provided + if (deleteResult) { + if (deleteResult.status === "deleted") { + console.log( + "File successfully cleaned up from Langflow:", + deleteResult.file_id, + ); + } else if (deleteResult.status === "delete_failed") { + console.warn( + "Failed to cleanup file from Langflow:", + deleteResult.error, + ); + } + } + // Notify UI + window.dispatchEvent( + new CustomEvent("fileUploaded", { + detail: { + file: file, + result: { + file_id: fileId, + file_path: filePath, + run: runJson, + deletion: deleteResult, + unified: true, + }, + }, + }), + ); - const result = await response.json(); + refetchTasks(); + } catch (error) { + window.dispatchEvent( + new CustomEvent("fileUploadError", { + detail: { + filename: file.name, + error: error instanceof Error ? error.message : "Upload failed", + }, + }), + ); + } finally { + window.dispatchEvent(new CustomEvent("fileUploadComplete")); + setFileUploading(false); + } + }; - if (response.status === 201) { - const taskId = result.task_id || result.id; + const handleOverwriteFile = async () => { + if (pendingFile) { + // Remove the old file from all search query caches before overwriting + queryClient.setQueriesData({ queryKey: ["search"] }, (oldData: []) => { + if (!oldData) return oldData; + // Filter out the file that's being overwritten + return oldData.filter( + (file: SearchFile) => file.filename !== pendingFile.name, + ); + }); - if (!taskId) { - throw new Error("No task ID received from server"); - } + await uploadFile(pendingFile, true); + setPendingFile(null); + setDuplicateFilename(""); + } + }; - addTask(taskId); - setFolderPath(""); - // Trigger search refresh after successful folder processing starts - console.log( - "Folder upload successful, dispatching knowledgeUpdated event" - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else if (response.ok) { - setFolderPath(""); - console.log( - "Folder upload successful (direct), dispatching knowledgeUpdated event" - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else { - console.error("Folder upload failed:", result.error); - if (response.status === 400) { - toast.error("Upload failed", { - description: result.error || "Bad request", - }); - } - } - } catch (error) { - console.error("Folder upload error:", error); - } finally { - setFolderLoading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - }; + const handleFolderUpload = async () => { + if (!folderPath.trim()) return; - const handleS3Upload = async () => { - if (!bucketUrl.trim()) return; + setFolderLoading(true); + setShowFolderDialog(false); - setS3Loading(true); - setShowS3Dialog(false); + try { + const response = await fetch("/api/upload_path", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ path: folderPath }), + }); - try { - const response = await fetch("/api/upload_bucket", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ s3_url: bucketUrl }), - }); + const result = await response.json(); - const result = await response.json(); + if (response.status === 201) { + const taskId = result.task_id || result.id; - if (response.status === 201) { - const taskId = result.task_id || result.id; + if (!taskId) { + throw new Error("No task ID received from server"); + } - if (!taskId) { - throw new Error("No task ID received from server"); - } + addTask(taskId); + setFolderPath(""); + // Refetch tasks to show the new task + refetchTasks(); + } else if (response.ok) { + setFolderPath(""); + // Refetch tasks even for direct uploads in case tasks were created + refetchTasks(); + } else { + console.error("Folder upload failed:", result.error); + if (response.status === 400) { + toast.error("Upload failed", { + description: result.error || "Bad request", + }); + } + } + } catch (error) { + console.error("Folder upload error:", error); + } finally { + setFolderLoading(false); + } + }; - addTask(taskId); - setBucketUrl("s3://"); - // Trigger search refresh after successful S3 processing starts - console.log("S3 upload successful, dispatching knowledgeUpdated event"); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else { - console.error("S3 upload failed:", result.error); - if (response.status === 400) { - toast.error("Upload failed", { - description: result.error || "Bad request", - }); - } - } - } catch (error) { - console.error("S3 upload error:", error); - } finally { - setS3Loading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - }; + const handleS3Upload = async () => { + if (!bucketUrl.trim()) return; - const cloudConnectorItems = Object.entries(cloudConnectors) - .filter(([, info]) => info.available) - .map(([type, info]) => ({ - label: info.name, - icon: PlugZap, - onClick: async () => { - setIsOpen(false); - if (info.connected && info.hasToken) { - setIsNavigatingToCloud(true); - try { - router.push(`/upload/${type}`); - // Keep loading state for a short time to show feedback - setTimeout(() => setIsNavigatingToCloud(false), 1000); - } catch { - setIsNavigatingToCloud(false); - } - } else { - router.push("/settings"); - } - }, - disabled: !info.connected || !info.hasToken, - tooltip: !info.connected - ? `Connect ${info.name} in Settings first` - : !info.hasToken - ? `Reconnect ${info.name} - access token required` - : undefined, - })); + setS3Loading(true); + setShowS3Dialog(false); - const menuItems = [ - { - label: "Add File", - icon: Upload, - onClick: handleFileUpload, - }, - { - label: "Process Folder", - icon: FolderOpen, - onClick: () => { - setIsOpen(false); - setShowFolderDialog(true); - }, - }, - ...(awsEnabled - ? [ - { - label: "Process S3 Bucket", - icon: Cloud, - onClick: () => { - setIsOpen(false); - setShowS3Dialog(true); - }, - }, - ] - : []), - ...cloudConnectorItems, - ]; + try { + const response = await fetch("/api/upload_bucket", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ s3_url: bucketUrl }), + }); - // Comprehensive loading state - const isLoading = - fileUploading || folderLoading || s3Loading || isNavigatingToCloud; + const result = await response.json(); - return ( - <> -
- + if (response.status === 201) { + const taskId = result.task_id || result.id; - {isOpen && !isLoading && ( -
-
- {menuItems.map((item, index) => ( - - ))} -
-
- )} + if (!taskId) { + throw new Error("No task ID received from server"); + } - -
+ addTask(taskId); + setBucketUrl("s3://"); + // Refetch tasks to show the new task + refetchTasks(); + } else { + console.error("S3 upload failed:", result.error); + if (response.status === 400) { + toast.error("Upload failed", { + description: result.error || "Bad request", + }); + } + } + } catch (error) { + console.error("S3 upload error:", error); + } finally { + setS3Loading(false); + } + }; - {/* Process Folder Dialog */} - - - - - - Process Folder - - - Process all documents in a folder path - - -
-
- - setFolderPath(e.target.value)} - /> -
-
- - -
-
-
-
+ const cloudConnectorItems = Object.entries(cloudConnectors) + .filter(([, info]) => info.available) + .map(([type, info]) => ({ + label: info.name, + icon: PlugZap, + onClick: async () => { + setIsOpen(false); + if (info.connected && info.hasToken) { + setIsNavigatingToCloud(true); + try { + router.push(`/upload/${type}`); + // Keep loading state for a short time to show feedback + setTimeout(() => setIsNavigatingToCloud(false), 1000); + } catch { + setIsNavigatingToCloud(false); + } + } else { + router.push("/settings"); + } + }, + disabled: !info.connected || !info.hasToken, + tooltip: !info.connected + ? `Connect ${info.name} in Settings first` + : !info.hasToken + ? `Reconnect ${info.name} - access token required` + : undefined, + })); - {/* Process S3 Bucket Dialog */} - - - - - - Process S3 Bucket - - - Process all documents from an S3 bucket. AWS credentials must be - configured. - - -
-
- - setBucketUrl(e.target.value)} - /> -
-
- - -
-
-
-
- - ); + const menuItems = [ + { + label: "Add File", + icon: Upload, + onClick: handleFileUpload, + }, + { + label: "Process Folder", + icon: FolderOpen, + onClick: () => { + setIsOpen(false); + setShowFolderDialog(true); + }, + }, + ...(awsEnabled + ? [ + { + label: "Process S3 Bucket", + icon: Cloud, + onClick: () => { + setIsOpen(false); + setShowS3Dialog(true); + }, + }, + ] + : []), + ...cloudConnectorItems, + ]; + + // Comprehensive loading state + const isLoading = + fileUploading || folderLoading || s3Loading || isNavigatingToCloud; + + return ( + <> +
+ + + {isOpen && !isLoading && ( +
+
+ {menuItems.map((item, index) => ( + + ))} +
+
+ )} + + +
+ + {/* Process Folder Dialog */} + + + + + + Process Folder + + + Process all documents in a folder path + + +
+
+ + setFolderPath(e.target.value)} + /> +
+
+ + +
+
+
+
+ + {/* Process S3 Bucket Dialog */} + + + + + + Process S3 Bucket + + + Process all documents from an S3 bucket. AWS credentials must be + configured. + + +
+
+ + setBucketUrl(e.target.value)} + /> +
+
+ + +
+
+
+
+ + {/* Duplicate Handling Dialog */} + + + ); } diff --git a/frontend/components/logo/ibm-logo.tsx b/frontend/components/logo/ibm-logo.tsx index 158ffa3b..e37adec1 100644 --- a/frontend/components/logo/ibm-logo.tsx +++ b/frontend/components/logo/ibm-logo.tsx @@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps) { {...props} > IBM watsonx.ai Logo - + ( placeholder={placeholder} className={cn( "primary-input", - icon && "pl-9", + icon && "!pl-9", type === "password" && "!pr-8", icon ? inputClassName : className )} diff --git a/frontend/src/app/api/mutations/useCancelTaskMutation.ts b/frontend/src/app/api/mutations/useCancelTaskMutation.ts new file mode 100644 index 00000000..1bf2faed --- /dev/null +++ b/frontend/src/app/api/mutations/useCancelTaskMutation.ts @@ -0,0 +1,47 @@ +import { + type UseMutationOptions, + useMutation, + useQueryClient, +} from "@tanstack/react-query"; + +export interface CancelTaskRequest { + taskId: string; +} + +export interface CancelTaskResponse { + status: string; + task_id: string; +} + +export const useCancelTaskMutation = ( + options?: Omit< + UseMutationOptions, + "mutationFn" + > +) => { + const queryClient = useQueryClient(); + + async function cancelTask( + variables: CancelTaskRequest, + ): Promise { + const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, { + method: "POST", + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || "Failed to cancel task"); + } + + return response.json(); + } + + return useMutation({ + mutationFn: cancelTask, + onSuccess: () => { + // Invalidate tasks query to refresh the list + queryClient.invalidateQueries({ queryKey: ["tasks"] }); + }, + ...options, + }); +}; diff --git a/frontend/src/app/api/queries/useGetNudgesQuery.ts b/frontend/src/app/api/queries/useGetNudgesQuery.ts index a9fe37a4..2e313e0c 100644 --- a/frontend/src/app/api/queries/useGetNudgesQuery.ts +++ b/frontend/src/app/api/queries/useGetNudgesQuery.ts @@ -7,9 +7,6 @@ import { type Nudge = string; const DEFAULT_NUDGES = [ - "Show me this quarter's top 10 deals", - "Summarize recent client interactions", - "Search OpenSearch for mentions of our competitors", ]; export const useGetNudgesQuery = ( diff --git a/frontend/src/app/api/queries/useGetTasksQuery.ts b/frontend/src/app/api/queries/useGetTasksQuery.ts new file mode 100644 index 00000000..1ea59d26 --- /dev/null +++ b/frontend/src/app/api/queries/useGetTasksQuery.ts @@ -0,0 +1,79 @@ +import { + type UseQueryOptions, + useQuery, + useQueryClient, +} from "@tanstack/react-query"; + +export interface Task { + task_id: string; + status: + | "pending" + | "running" + | "processing" + | "completed" + | "failed" + | "error"; + total_files?: number; + processed_files?: number; + successful_files?: number; + failed_files?: number; + running_files?: number; + pending_files?: number; + created_at: string; + updated_at: string; + duration_seconds?: number; + result?: Record; + error?: string; + files?: Record>; +} + +export interface TasksResponse { + tasks: Task[]; +} + +export const useGetTasksQuery = ( + options?: Omit, "queryKey" | "queryFn"> +) => { + const queryClient = useQueryClient(); + + async function getTasks(): Promise { + const response = await fetch("/api/tasks"); + + if (!response.ok) { + throw new Error("Failed to fetch tasks"); + } + + const data: TasksResponse = await response.json(); + return data.tasks || []; + } + + const queryResult = useQuery( + { + queryKey: ["tasks"], + queryFn: getTasks, + refetchInterval: (query) => { + // Only poll if there are tasks with pending or running status + const data = query.state.data; + if (!data || data.length === 0) { + return false; // Stop polling if no tasks + } + + const hasActiveTasks = data.some( + (task: Task) => + task.status === "pending" || + task.status === "running" || + task.status === "processing" + ); + + return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist + }, + refetchIntervalInBackground: true, + staleTime: 0, // Always consider data stale to ensure fresh updates + gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes + ...options, + }, + queryClient, + ); + + return queryResult; +}; diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index 6288bcfc..327bd884 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -1,201 +1,204 @@ "use client"; import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react"; -import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; import { useRouter, useSearchParams } from "next/navigation"; +import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; +// import { Label } from "@/components/ui/label"; +// import { Checkbox } from "@/components/ui/checkbox"; +import { filterAccentClasses } from "@/components/knowledge-filter-panel"; import { ProtectedRoute } from "@/components/protected-route"; import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useLayout } from "@/contexts/layout-context"; import { useTask } from "@/contexts/task-context"; import { - type ChunkResult, - type File, - useGetSearchQuery, + type ChunkResult, + type File, + useGetSearchQuery, } from "../../api/queries/useGetSearchQuery"; -// import { Label } from "@/components/ui/label"; -// import { Checkbox } from "@/components/ui/checkbox"; -import { filterAccentClasses } from "@/components/knowledge-filter-panel"; const getFileTypeLabel = (mimetype: string) => { - if (mimetype === "application/pdf") return "PDF"; - if (mimetype === "text/plain") return "Text"; - if (mimetype === "application/msword") return "Word Document"; - return "Unknown"; + if (mimetype === "application/pdf") return "PDF"; + if (mimetype === "text/plain") return "Text"; + if (mimetype === "application/msword") return "Word Document"; + return "Unknown"; }; function ChunksPageContent() { - const router = useRouter(); - const searchParams = useSearchParams(); - const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = - useKnowledgeFilter(); - const { isMenuOpen } = useTask(); - const { totalTopOffset } = useLayout(); + const router = useRouter(); + const searchParams = useSearchParams(); + const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = + useKnowledgeFilter(); + const { isMenuOpen } = useTask(); + const { totalTopOffset } = useLayout(); - const filename = searchParams.get("filename"); - const [chunks, setChunks] = useState([]); - const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< - ChunkResult[] - >([]); - const [selectedChunks, setSelectedChunks] = useState>(new Set()); - const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< - number | null - >(null); + const filename = searchParams.get("filename"); + const [chunks, setChunks] = useState([]); + const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< + ChunkResult[] + >([]); + const [selectedChunks, setSelectedChunks] = useState>(new Set()); + const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< + number | null + >(null); - // Calculate average chunk length - const averageChunkLength = useMemo( - () => - chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / - chunks.length || 0, - [chunks] - ); + // Calculate average chunk length + const averageChunkLength = useMemo( + () => + chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / + chunks.length || 0, + [chunks], + ); - const [selectAll, setSelectAll] = useState(false); - const [queryInputText, setQueryInputText] = useState( - parsedFilterData?.query ?? "" - ); + const [selectAll, setSelectAll] = useState(false); + const [queryInputText, setQueryInputText] = useState( + parsedFilterData?.query ?? "", + ); - // Use the same search query as the knowledge page, but we'll filter for the specific file - const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); + // Use the same search query as the knowledge page, but we'll filter for the specific file + const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); - useEffect(() => { - if (queryInputText === "") { - setChunksFilteredByQuery(chunks); - } else { - setChunksFilteredByQuery( - chunks.filter((chunk) => - chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) - ) - ); - } - }, [queryInputText, chunks]); + useEffect(() => { + if (queryInputText === "") { + setChunksFilteredByQuery(chunks); + } else { + setChunksFilteredByQuery( + chunks.filter((chunk) => + chunk.text.toLowerCase().includes(queryInputText.toLowerCase()), + ), + ); + } + }, [queryInputText, chunks]); - const handleCopy = useCallback((text: string, index: number) => { - // Trim whitespace and remove new lines/tabs for cleaner copy - navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); - setActiveCopiedChunkIndex(index); - setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds - }, []); + const handleCopy = useCallback((text: string, index: number) => { + // Trim whitespace and remove new lines/tabs for cleaner copy + navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); + setActiveCopiedChunkIndex(index); + setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds + }, []); - const fileData = (data as File[]).find( - (file: File) => file.filename === filename - ); + const fileData = (data as File[]).find( + (file: File) => file.filename === filename, + ); - // Extract chunks for the specific file - useEffect(() => { - if (!filename || !(data as File[]).length) { - setChunks([]); - return; - } + // Extract chunks for the specific file + useEffect(() => { + if (!filename || !(data as File[]).length) { + setChunks([]); + return; + } - setChunks( - fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [] - ); - }, [data, filename]); + setChunks( + fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [], + ); + }, [data, filename]); - // Set selected state for all checkboxes when selectAll changes - useEffect(() => { - if (selectAll) { - setSelectedChunks(new Set(chunks.map((_, index) => index))); - } else { - setSelectedChunks(new Set()); - } - }, [selectAll, setSelectedChunks, chunks]); + // Set selected state for all checkboxes when selectAll changes + useEffect(() => { + if (selectAll) { + setSelectedChunks(new Set(chunks.map((_, index) => index))); + } else { + setSelectedChunks(new Set()); + } + }, [selectAll, setSelectedChunks, chunks]); - const handleBack = useCallback(() => { - router.push("/knowledge"); - }, [router]); + const handleBack = useCallback(() => { + router.push("/knowledge"); + }, [router]); - // const handleChunkCardCheckboxChange = useCallback( - // (index: number) => { - // setSelectedChunks((prevSelected) => { - // const newSelected = new Set(prevSelected); - // if (newSelected.has(index)) { - // newSelected.delete(index); - // } else { - // newSelected.add(index); - // } - // return newSelected; - // }); - // }, - // [setSelectedChunks] - // ); + // const handleChunkCardCheckboxChange = useCallback( + // (index: number) => { + // setSelectedChunks((prevSelected) => { + // const newSelected = new Set(prevSelected); + // if (newSelected.has(index)) { + // newSelected.delete(index); + // } else { + // newSelected.add(index); + // } + // return newSelected; + // }); + // }, + // [setSelectedChunks] + // ); - if (!filename) { - return ( -
-
- -

No file specified

-

- Please select a file from the knowledge page -

-
-
- ); - } + if (!filename) { + return ( +
+
+ +

No file specified

+

+ Please select a file from the knowledge page +

+
+
+ ); + } - return ( -
-
- {/* Header */} -
-
- -

- {/* Removes file extension from filename */} - {filename.replace(/\.[^/.]+$/, "")} -

-
-
-
-
- {selectedFilter?.name && ( -
- {selectedFilter?.name} - setSelectedFilter(null)} - /> -
- )} - - setQueryInputText(e.target.value)} - value={queryInputText} - /> -
-
- {/*
+ return ( +
+
+ {/* Header */} +
+
+ +

+ {/* Removes file extension from filename */} + {filename.replace(/\.[^/.]+$/, "")} +

+
+
+
+
+ {selectedFilter?.name && ( +
+ {selectedFilter?.name} + setSelectedFilter(null)} + /> +
+ )} + + setQueryInputText(e.target.value)} + value={queryInputText} + /> +
+
+ {/*
*/} -
-
+
+
- {/* Content Area - matches knowledge page structure */} -
- {isFetching ? ( -
-
- -

- Loading chunks... -

-
-
- ) : chunks.length === 0 ? ( -
-
-

No knowledge

-

- Clear the knowledge filter or return to the knowledge page -

-
-
- ) : ( -
- {chunksFilteredByQuery.map((chunk, index) => ( -
-
-
- {/*
+ {/* Content Area - matches knowledge page structure */} +
+ {isFetching ? ( +
+
+ +

+ Loading chunks... +

+
+
+ ) : chunks.length === 0 ? ( +
+
+

No knowledge

+

+ Clear the knowledge filter or return to the knowledge page +

+
+
+ ) : ( +
+ {chunksFilteredByQuery.map((chunk, index) => ( +
+
+
+ {/*
@@ -250,73 +253,73 @@ function ChunksPageContent() { } />
*/} - - Chunk {chunk.index} - - - {chunk.text.length} chars - -
- -
-
+ + Chunk {chunk.index} + + + {chunk.text.length} chars + +
+ +
+
- - {chunk.score.toFixed(2)} score - + + {chunk.score.toFixed(2)} score + - {/* TODO: Update to use active toggle */} - {/* + {/* TODO: Update to use active toggle */} + {/* Active */} -
-
- {chunk.text} -
-
- ))} -
- )} -
-
- {/* Right panel - Summary (TODO), Technical details, */} - {chunks.length > 0 && ( -
-
-

- Technical details -

-
-
-
- Total chunks -
-
- {chunks.length} -
-
-
-
Avg length
-
- {averageChunkLength.toFixed(0)} chars -
-
- {/* TODO: Uncomment after data is available */} - {/*
+
+
+ {chunk.text} +
+
+ ))} +
+ )} +
+
+ {/* Right panel - Summary (TODO), Technical details, */} + {chunks.length > 0 && ( +
+
+

+ Technical details +

+
+
+
+ Total chunks +
+
+ {chunks.length} +
+
+
+
Avg length
+
+ {averageChunkLength.toFixed(0)} chars +
+
+ {/* TODO: Uncomment after data is available */} + {/*
Process time
@@ -326,79 +329,79 @@ function ChunksPageContent() {
*/} -
-
-
-

- Original document -

-
- {/*
+
+
+
+

+ Original document +

+
+ {/*
Name
{fileData?.filename}
*/} -
-
Type
-
- {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} -
-
-
-
Size
-
- {fileData?.size - ? `${Math.round(fileData.size / 1024)} KB` - : "Unknown"} -
-
- {/*
+
+
Type
+
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} +
+
+
+
Size
+
+ {fileData?.size + ? `${Math.round(fileData.size / 1024)} KB` + : "Unknown"} +
+
+ {/*
Uploaded
N/A
*/} - {/* TODO: Uncomment after data is available */} - {/*
+ {/* TODO: Uncomment after data is available */} + {/*
Source
*/} - {/*
+ {/*
Updated
N/A
*/} -
-
-
- )} -
- ); + +
+
+ )} +
+ ); } function ChunksPage() { - return ( - -
- -

Loading...

-
-
- } - > - - - ); + return ( + +
+ +

Loading...

+
+
+ } + > + + + ); } export default function ProtectedChunksPage() { - return ( - - - - ); + return ( + + + + ); } diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index d5ef211c..2cd6f382 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -1,10 +1,24 @@ "use client"; -import type { ColDef } from "ag-grid-community"; +import type { ColDef, GetRowIdParams } from "ag-grid-community"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; -import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react"; +import { + Building2, + Cloud, + Globe, + HardDrive, + Search, + Trash2, + X, +} from "lucide-react"; import { useRouter } from "next/navigation"; -import { type ChangeEvent, useCallback, useRef, useState } from "react"; +import { + type ChangeEvent, + useCallback, + useEffect, + useRef, + useState, +} from "react"; import { SiGoogledrive } from "react-icons/si"; import { TbBrandOnedrive } from "react-icons/tb"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; @@ -18,14 +32,16 @@ import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/agGridStyles.css"; import { toast } from "sonner"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; +import { filterAccentClasses } from "@/components/knowledge-filter-panel"; import { StatusBadge } from "@/components/ui/status-badge"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; -import { filterAccentClasses } from "@/components/knowledge-filter-panel"; // Function to get the appropriate icon for a connector type function getSourceIcon(connectorType?: string) { switch (connectorType) { + case "url": + return ; case "google_drive": return ( @@ -47,7 +63,7 @@ function getSourceIcon(connectorType?: string) { function SearchPage() { const router = useRouter(); - const { isMenuOpen, files: taskFiles } = useTask(); + const { isMenuOpen, files: taskFiles, refreshTasks } = useTask(); const { totalTopOffset } = useLayout(); const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = useKnowledgeFilter(); @@ -56,15 +72,14 @@ function SearchPage() { const deleteDocumentMutation = useDeleteDocument(); - const { data = [], isFetching } = useGetSearchQuery( + useEffect(() => { + refreshTasks(); + }, [refreshTasks]); + + const { data: searchData = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", - parsedFilterData + parsedFilterData, ); - - const handleTableSearch = (e: ChangeEvent) => { - gridRef.current?.api.setGridOption("quickFilterText", e.target.value); - }; - // Convert TaskFiles to File format and merge with backend results const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { return { @@ -77,13 +92,32 @@ function SearchPage() { }; }); - const backendFiles = data as File[]; + // Create a map of task files by filename for quick lookup + const taskFileMap = new Map( + taskFilesAsFiles.map((file) => [file.filename, file]), + ); + + // Override backend files with task file status if they exist + const backendFiles = (searchData as File[]) + .map((file) => { + const taskFile = taskFileMap.get(file.filename); + if (taskFile) { + // Override backend file with task file data (includes status) + return { ...file, ...taskFile }; + } + return file; + }) + .filter((file) => { + // Only filter out files that are currently processing AND in taskFiles + const taskFile = taskFileMap.get(file.filename); + return !taskFile || taskFile.status !== "processing"; + }); const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { return ( taskFile.status !== "active" && !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename + (backendFile) => backendFile.filename === taskFile.filename, ) ); }); @@ -91,41 +125,60 @@ function SearchPage() { // Combine task files first, then backend files const fileResults = [...backendFiles, ...filteredTaskFiles]; + const handleTableSearch = (e: ChangeEvent) => { + gridRef.current?.api.setGridOption("quickFilterText", e.target.value); + }; + const gridRef = useRef(null); - const [columnDefs] = useState[]>([ + const columnDefs = [ { field: "filename", headerName: "Source", - checkboxSelection: true, + checkboxSelection: (params: CustomCellRendererProps) => + (params?.data?.status || "active") === "active", headerCheckboxSelection: true, initialFlex: 2, minWidth: 220, cellRenderer: ({ data, value }: CustomCellRendererProps) => { + // Read status directly from data on each render + const status = data?.status || "active"; + const isActive = status === "active"; + console.log(data?.filename, status, "a"); return ( - +
+
+ +
); }, }, { field: "size", headerName: "Size", - valueFormatter: (params) => + valueFormatter: (params: CustomCellRendererProps) => params.value ? `${Math.round(params.value / 1024)} KB` : "-", }, { @@ -135,13 +188,14 @@ function SearchPage() { { field: "owner", headerName: "Owner", - valueFormatter: (params) => + valueFormatter: (params: CustomCellRendererProps) => params.data?.owner_name || params.data?.owner_email || "—", }, { field: "chunkCount", headerName: "Chunks", - valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", + valueFormatter: (params: CustomCellRendererProps) => + params.data?.chunkCount?.toString() || "-", }, { field: "avgScore", @@ -159,6 +213,7 @@ function SearchPage() { field: "status", headerName: "Status", cellRenderer: ({ data }: CustomCellRendererProps) => { + console.log(data?.filename, data?.status, "b"); // Default to 'active' status if no status is provided const status = data?.status || "active"; return ; @@ -166,6 +221,10 @@ function SearchPage() { }, { cellRenderer: ({ data }: CustomCellRendererProps) => { + const status = data?.status || "active"; + if (status !== "active") { + return null; + } return ; }, cellStyle: { @@ -182,7 +241,7 @@ function SearchPage() { sortable: false, initialFlex: 0, }, - ]); + ]; const defaultColDef: ColDef = { resizable: false, @@ -204,7 +263,7 @@ function SearchPage() { try { // Delete each file individually since the API expects one filename at a time const deletePromises = selectedRows.map((row) => - deleteDocumentMutation.mutateAsync({ filename: row.filename }) + deleteDocumentMutation.mutateAsync({ filename: row.filename }), ); await Promise.all(deletePromises); @@ -212,7 +271,7 @@ function SearchPage() { toast.success( `Successfully deleted ${selectedRows.length} document${ selectedRows.length > 1 ? "s" : "" - }` + }`, ); setSelectedRows([]); setShowBulkDeleteDialog(false); @@ -225,7 +284,7 @@ function SearchPage() { toast.error( error instanceof Error ? error.message - : "Failed to delete some documents" + : "Failed to delete some documents", ); } }; @@ -270,16 +329,13 @@ function SearchPage() { />
)} - + @@ -317,7 +373,7 @@ function SearchPage() { []} defaultColDef={defaultColDef} loading={isFetching} ref={gridRef} @@ -325,7 +381,7 @@ function SearchPage() { rowSelection="multiple" rowMultiSelectWithClick={false} suppressRowClickSelection={true} - getRowId={(params) => params.data.filename} + getRowId={(params: GetRowIdParams) => params.data?.filename} domLayout="normal" onSelectionChanged={onSelectionChanged} noRowsOverlayComponent={() => ( diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index a6e22ffd..a4101535 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -633,30 +633,54 @@ function KnowledgeSourcesPage() { {/* Conditional Sync Settings or No-Auth Message */} - { - isNoAuthMode ? ( - - - - Cloud connectors are only available with auth mode enabled - - - Please provide the following environment variables and - restart: - - - -
-
- # make here - https://console.cloud.google.com/apis/credentials -
-
GOOGLE_OAUTH_CLIENT_ID=
-
GOOGLE_OAUTH_CLIENT_SECRET=
-
-
-
- ) : null + { + isNoAuthMode ? ( + + + + Cloud connectors require authentication + + + Add the Google OAuth variables below to your .env{" "} + then restart the OpenRAG containers. + + + +
+
+
+ + 27 + + # Google OAuth +
+
+ + 28 + + # Create credentials here: +
+
+ + 29 + + + # https://console.cloud.google.com/apis/credentials + +
+
+
+ 30 + GOOGLE_OAUTH_CLIENT_ID= +
+
+ 31 + GOOGLE_OAUTH_CLIENT_SECRET= +
+
+
+
+ ) : null //
//
//

Sync Settings

diff --git a/frontend/src/components/task-notification-menu.tsx b/frontend/src/components/task-notification-menu.tsx index e17f9579..fed7e6f1 100644 --- a/frontend/src/components/task-notification-menu.tsx +++ b/frontend/src/components/task-notification-menu.tsx @@ -1,6 +1,6 @@ "use client" -import { useState } from 'react' +import { useEffect, useState } from 'react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' @@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge' import { useTask, Task } from '@/contexts/task-context' export function TaskNotificationMenu() { - const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() + const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask() const [isExpanded, setIsExpanded] = useState(false) + // Sync local state with context state + useEffect(() => { + if (isRecentTasksExpanded) { + setIsExpanded(true) + } + }, [isRecentTasksExpanded]) + // Don't render if menu is closed if (!isMenuOpen) return null diff --git a/frontend/src/components/ui/animated-processing-icon.tsx b/frontend/src/components/ui/animated-processing-icon.tsx index eb36b2ab..51815414 100644 --- a/frontend/src/components/ui/animated-processing-icon.tsx +++ b/frontend/src/components/ui/animated-processing-icon.tsx @@ -1,26 +1,16 @@ -interface AnimatedProcessingIconProps { - className?: string; - size?: number; -} +import type { SVGProps } from "react"; -export const AnimatedProcessingIcon = ({ - className = "", - size = 10, -}: AnimatedProcessingIconProps) => { - const width = Math.round((size * 6) / 10); - const height = size; - - return ( - - + + + + + + + ); }; diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx index d3b1a323..e57ad3b5 100644 --- a/frontend/src/components/ui/status-badge.tsx +++ b/frontend/src/components/ui/status-badge.tsx @@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => { }`} > {status === "processing" && ( - + )} {config.label}
diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index 5eb10ea9..12ad3c24 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -7,33 +7,18 @@ import { useCallback, useContext, useEffect, + useRef, useState, } from "react"; import { toast } from "sonner"; +import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation"; +import { + type Task, + useGetTasksQuery, +} from "@/app/api/queries/useGetTasksQuery"; import { useAuth } from "@/contexts/auth-context"; -export interface Task { - task_id: string; - status: - | "pending" - | "running" - | "processing" - | "completed" - | "failed" - | "error"; - total_files?: number; - processed_files?: number; - successful_files?: number; - failed_files?: number; - running_files?: number; - pending_files?: number; - created_at: string; - updated_at: string; - duration_seconds?: number; - result?: Record; - error?: string; - files?: Record>; -} +// Task interface is now imported from useGetTasksQuery export interface TaskFile { filename: string; @@ -51,27 +36,54 @@ interface TaskContextType { files: TaskFile[]; addTask: (taskId: string) => void; addFiles: (files: Partial[], taskId: string) => void; - removeTask: (taskId: string) => void; refreshTasks: () => Promise; cancelTask: (taskId: string) => Promise; isPolling: boolean; isFetching: boolean; isMenuOpen: boolean; toggleMenu: () => void; + isRecentTasksExpanded: boolean; + setRecentTasksExpanded: (expanded: boolean) => void; + // React Query states + isLoading: boolean; + error: Error | null; } const TaskContext = createContext(undefined); export function TaskProvider({ children }: { children: React.ReactNode }) { - const [tasks, setTasks] = useState([]); const [files, setFiles] = useState([]); - const [isPolling, setIsPolling] = useState(false); - const [isFetching, setIsFetching] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false); + const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false); + const previousTasksRef = useRef([]); const { isAuthenticated, isNoAuthMode } = useAuth(); const queryClient = useQueryClient(); + // Use React Query hooks + const { + data: tasks = [], + isLoading, + error, + refetch: refetchTasks, + isFetching, + } = useGetTasksQuery({ + enabled: isAuthenticated || isNoAuthMode, + }); + + const cancelTaskMutation = useCancelTaskMutation({ + onSuccess: () => { + toast.success("Task cancelled", { + description: "Task has been cancelled successfully", + }); + }, + onError: (error) => { + toast.error("Failed to cancel task", { + description: error.message, + }); + }, + }); + const refetchSearch = useCallback(() => { queryClient.invalidateQueries({ queryKey: ["search"], @@ -99,265 +111,216 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { [], ); - const fetchTasks = useCallback(async () => { - if (!isAuthenticated && !isNoAuthMode) return; - - setIsFetching(true); - try { - const response = await fetch("/api/tasks"); - if (response.ok) { - const data = await response.json(); - const newTasks = data.tasks || []; - - // Update tasks and check for status changes in the same state update - setTasks((prevTasks) => { - // Check for newly completed tasks to show toasts - if (prevTasks.length > 0) { - newTasks.forEach((newTask: Task) => { - const oldTask = prevTasks.find( - (t) => t.task_id === newTask.task_id, - ); - - // Update or add files from task.files if available - if (newTask.files && typeof newTask.files === "object") { - const taskFileEntries = Object.entries(newTask.files); - const now = new Date().toISOString(); - - taskFileEntries.forEach(([filePath, fileInfo]) => { - if (typeof fileInfo === "object" && fileInfo) { - const fileName = filePath.split("/").pop() || filePath; - const fileStatus = fileInfo.status as string; - - // Map backend file status to our TaskFile status - let mappedStatus: TaskFile["status"]; - switch (fileStatus) { - case "pending": - case "running": - mappedStatus = "processing"; - break; - case "completed": - mappedStatus = "active"; - break; - case "failed": - mappedStatus = "failed"; - break; - default: - mappedStatus = "processing"; - } - - setFiles((prevFiles) => { - const existingFileIndex = prevFiles.findIndex( - (f) => - f.source_url === filePath && - f.task_id === newTask.task_id, - ); - - // Detect connector type based on file path or other indicators - let connectorType = "local"; - if (filePath.includes("/") && !filePath.startsWith("/")) { - // Likely S3 key format (bucket/path/file.ext) - connectorType = "s3"; - } - - const fileEntry: TaskFile = { - filename: fileName, - mimetype: "", // We don't have this info from the task - source_url: filePath, - size: 0, // We don't have this info from the task - connector_type: connectorType, - status: mappedStatus, - task_id: newTask.task_id, - created_at: - typeof fileInfo.created_at === "string" - ? fileInfo.created_at - : now, - updated_at: - typeof fileInfo.updated_at === "string" - ? fileInfo.updated_at - : now, - }; - - if (existingFileIndex >= 0) { - // Update existing file - const updatedFiles = [...prevFiles]; - updatedFiles[existingFileIndex] = fileEntry; - return updatedFiles; - } else { - // Add new file - return [...prevFiles, fileEntry]; - } - }); - } - }); - } - - if ( - oldTask && - oldTask.status !== "completed" && - newTask.status === "completed" - ) { - // Task just completed - show success toast - toast.success("Task completed successfully", { - description: `Task ${newTask.task_id} has finished processing.`, - action: { - label: "View", - onClick: () => console.log("View task", newTask.task_id), - }, - }); - refetchSearch(); - // Dispatch knowledge updated event for all knowledge-related pages - console.log( - "Task completed successfully, dispatching knowledgeUpdated event", - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - - // Remove files for this completed task from the files list - setFiles((prevFiles) => - prevFiles.filter((file) => file.task_id !== newTask.task_id), - ); - } else if ( - oldTask && - oldTask.status !== "failed" && - oldTask.status !== "error" && - (newTask.status === "failed" || newTask.status === "error") - ) { - // Task just failed - show error toast - toast.error("Task failed", { - description: `Task ${newTask.task_id} failed: ${ - newTask.error || "Unknown error" - }`, - }); - - // Files will be updated to failed status by the file parsing logic above - } - }); - } - - return newTasks; - }); - } - } catch (error) { - console.error("Failed to fetch tasks:", error); - } finally { - setIsFetching(false); + // Handle task status changes and file updates + useEffect(() => { + if (tasks.length === 0) { + // Store current tasks as previous for next comparison + previousTasksRef.current = tasks; + return; } - }, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop! - const addTask = useCallback((taskId: string) => { - // Immediately start aggressive polling for the new task - let pollAttempts = 0; - const maxPollAttempts = 30; // Poll for up to 30 seconds + // Check for task status changes by comparing with previous tasks + tasks.forEach((currentTask) => { + const previousTask = previousTasksRef.current.find( + (prev) => prev.task_id === currentTask.task_id, + ); - const aggressivePoll = async () => { - try { - const response = await fetch("/api/tasks"); - if (response.ok) { - const data = await response.json(); - const newTasks = data.tasks || []; - const foundTask = newTasks.find( - (task: Task) => task.task_id === taskId, - ); + // Only show toasts if we have previous data and status has changed + if ( + (previousTask && previousTask.status !== currentTask.status) || + (!previousTask && previousTasksRef.current.length !== 0) + ) { + // Process files from failed task and add them to files list + if (currentTask.files && typeof currentTask.files === "object") { + const taskFileEntries = Object.entries(currentTask.files); + const now = new Date().toISOString(); - if (foundTask) { - // Task found! Update the tasks state - setTasks((prevTasks) => { - // Check if task is already in the list - const exists = prevTasks.some((t) => t.task_id === taskId); - if (!exists) { - return [...prevTasks, foundTask]; + taskFileEntries.forEach(([filePath, fileInfo]) => { + if (typeof fileInfo === "object" && fileInfo) { + // Use the filename from backend if available, otherwise extract from path + const fileName = + (fileInfo as any).filename || + filePath.split("/").pop() || + filePath; + const fileStatus = fileInfo.status as string; + + // Map backend file status to our TaskFile status + let mappedStatus: TaskFile["status"]; + switch (fileStatus) { + case "pending": + case "running": + mappedStatus = "processing"; + break; + case "completed": + mappedStatus = "active"; + break; + case "failed": + mappedStatus = "failed"; + break; + default: + mappedStatus = "processing"; } - // Update existing task - return prevTasks.map((t) => - t.task_id === taskId ? foundTask : t, - ); - }); - return; // Stop polling, we found it - } + + setFiles((prevFiles) => { + const existingFileIndex = prevFiles.findIndex( + (f) => + f.source_url === filePath && + f.task_id === currentTask.task_id, + ); + + // Detect connector type based on file path or other indicators + let connectorType = "local"; + if (filePath.includes("/") && !filePath.startsWith("/")) { + // Likely S3 key format (bucket/path/file.ext) + connectorType = "s3"; + } + + const fileEntry: TaskFile = { + filename: fileName, + mimetype: "", // We don't have this info from the task + source_url: filePath, + size: 0, // We don't have this info from the task + connector_type: connectorType, + status: mappedStatus, + task_id: currentTask.task_id, + created_at: + typeof fileInfo.created_at === "string" + ? fileInfo.created_at + : now, + updated_at: + typeof fileInfo.updated_at === "string" + ? fileInfo.updated_at + : now, + }; + + if (existingFileIndex >= 0) { + // Update existing file + const updatedFiles = [...prevFiles]; + updatedFiles[existingFileIndex] = fileEntry; + return updatedFiles; + } else { + // Add new file + return [...prevFiles, fileEntry]; + } + }); + } + }); } - } catch (error) { - console.error("Aggressive polling failed:", error); - } + if ( + previousTask && + previousTask.status !== "completed" && + currentTask.status === "completed" + ) { + // Task just completed - show success toast with file counts + const successfulFiles = currentTask.successful_files || 0; + const failedFiles = currentTask.failed_files || 0; - pollAttempts++; - if (pollAttempts < maxPollAttempts) { - // Continue polling every 1 second for new tasks - setTimeout(aggressivePoll, 1000); - } - }; + let description = ""; + if (failedFiles > 0) { + description = `${successfulFiles} file${ + successfulFiles !== 1 ? "s" : "" + } uploaded successfully, ${failedFiles} file${ + failedFiles !== 1 ? "s" : "" + } failed`; + } else { + description = `${successfulFiles} file${ + successfulFiles !== 1 ? "s" : "" + } uploaded successfully`; + } - // Start aggressive polling after a short delay to allow backend to process - setTimeout(aggressivePoll, 500); - }, []); + toast.success("Task completed", { + description, + action: { + label: "View", + onClick: () => { + setIsMenuOpen(true); + setIsRecentTasksExpanded(true); + }, + }, + }); + setTimeout(() => { + setFiles((prevFiles) => + prevFiles.filter( + (file) => + file.task_id !== currentTask.task_id || + file.status === "failed", + ), + ); + refetchSearch(); + }, 500); + } else if ( + previousTask && + previousTask.status !== "failed" && + previousTask.status !== "error" && + (currentTask.status === "failed" || currentTask.status === "error") + ) { + // Task just failed - show error toast + toast.error("Task failed", { + description: `Task ${currentTask.task_id} failed: ${ + currentTask.error || "Unknown error" + }`, + }); + } + } + }); + + // Store current tasks as previous for next comparison + previousTasksRef.current = tasks; + }, [tasks, refetchSearch]); + + const addTask = useCallback( + (_taskId: string) => { + // React Query will automatically handle polling when tasks are active + // Just trigger a refetch to get the latest data + setTimeout(() => { + refetchTasks(); + }, 500); + }, + [refetchTasks], + ); const refreshTasks = useCallback(async () => { - await fetchTasks(); - }, [fetchTasks]); + setFiles([]); + await refetchTasks(); + }, [refetchTasks]); - const removeTask = useCallback((taskId: string) => { - setTasks((prev) => prev.filter((task) => task.task_id !== taskId)); - }, []); const cancelTask = useCallback( async (taskId: string) => { - try { - const response = await fetch(`/api/tasks/${taskId}/cancel`, { - method: "POST", - }); - - if (response.ok) { - // Immediately refresh tasks to show the updated status - await fetchTasks(); - toast.success("Task cancelled", { - description: `Task ${taskId.substring(0, 8)}... has been cancelled`, - }); - } else { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.error || "Failed to cancel task"); - } - } catch (error) { - console.error("Failed to cancel task:", error); - toast.error("Failed to cancel task", { - description: error instanceof Error ? error.message : "Unknown error", - }); - } + cancelTaskMutation.mutate({ taskId }); }, - [fetchTasks], + [cancelTaskMutation], ); const toggleMenu = useCallback(() => { setIsMenuOpen((prev) => !prev); }, []); - // Periodic polling for task updates - useEffect(() => { - if (!isAuthenticated && !isNoAuthMode) return; - - setIsPolling(true); - - // Initial fetch - fetchTasks(); - - // Set up polling interval - every 3 seconds (more responsive for active tasks) - const interval = setInterval(fetchTasks, 3000); - - return () => { - clearInterval(interval); - setIsPolling(false); - }; - }, [isAuthenticated, isNoAuthMode, fetchTasks]); + // Determine if we're polling based on React Query's refetch interval + const isPolling = + isFetching && + tasks.some( + (task) => + task.status === "pending" || + task.status === "running" || + task.status === "processing", + ); const value: TaskContextType = { tasks, files, addTask, addFiles, - removeTask, refreshTasks, cancelTask, isPolling, isFetching, isMenuOpen, toggleMenu, + isRecentTasksExpanded, + setRecentTasksExpanded: setIsRecentTasksExpanded, + isLoading, + error, }; return {children}; diff --git a/src/api/documents.py b/src/api/documents.py index 82afb349..048f746a 100644 --- a/src/api/documents.py +++ b/src/api/documents.py @@ -6,14 +6,13 @@ from config.settings import INDEX_NAME logger = get_logger(__name__) -async def delete_documents_by_filename(request: Request, document_service, session_manager): - """Delete all documents with a specific filename""" - data = await request.json() - filename = data.get("filename") - +async def check_filename_exists(request: Request, document_service, session_manager): + """Check if a document with a specific filename already exists""" + filename = request.query_params.get("filename") + if not filename: - return JSONResponse({"error": "filename is required"}, status_code=400) - + return JSONResponse({"error": "filename parameter is required"}, status_code=400) + user = request.state.user jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) @@ -22,34 +21,79 @@ async def delete_documents_by_filename(request: Request, document_service, sessi opensearch_client = session_manager.get_user_opensearch_client( user.user_id, jwt_token ) - + + # Search for any document with this exact filename + from utils.opensearch_queries import build_filename_search_body + + search_body = build_filename_search_body(filename, size=1, source=["filename"]) + + logger.debug(f"Checking filename existence: {filename}") + + response = await opensearch_client.search( + index=INDEX_NAME, + body=search_body + ) + + # Check if any hits were found + hits = response.get("hits", {}).get("hits", []) + exists = len(hits) > 0 + + logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}") + + return JSONResponse({ + "exists": exists, + "filename": filename + }, status_code=200) + + except Exception as e: + logger.error("Error checking filename existence", filename=filename, error=str(e)) + error_str = str(e) + if "AuthenticationException" in error_str: + return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403) + else: + return JSONResponse({"error": str(e)}, status_code=500) + + +async def delete_documents_by_filename(request: Request, document_service, session_manager): + """Delete all documents with a specific filename""" + data = await request.json() + filename = data.get("filename") + + if not filename: + return JSONResponse({"error": "filename is required"}, status_code=400) + + user = request.state.user + jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) + + try: + # Get user's OpenSearch client + opensearch_client = session_manager.get_user_opensearch_client( + user.user_id, jwt_token + ) + # Delete by query to remove all chunks of this document - delete_query = { - "query": { - "bool": { - "must": [ - {"term": {"filename": filename}} - ] - } - } - } - + from utils.opensearch_queries import build_filename_delete_body + + delete_query = build_filename_delete_body(filename) + + logger.debug(f"Deleting documents with filename: {filename}") + result = await opensearch_client.delete_by_query( index=INDEX_NAME, body=delete_query, conflicts="proceed" ) - + deleted_count = result.get("deleted", 0) logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id) - + return JSONResponse({ "success": True, "deleted_chunks": deleted_count, "filename": filename, "message": f"All documents with filename '{filename}' deleted successfully" }, status_code=200) - + except Exception as e: logger.error("Error deleting documents by filename", filename=filename, error=str(e)) error_str = str(e) diff --git a/src/api/langflow_files.py b/src/api/langflow_files.py index 4fa17315..0226d4d5 100644 --- a/src/api/langflow_files.py +++ b/src/api/langflow_files.py @@ -189,19 +189,20 @@ async def upload_and_ingest_user_file( # Create temporary file for task processing import tempfile import os - + # Read file content content = await upload_file.read() - - # Create temporary file + + # Create temporary file with the actual filename (not a temp prefix) + # Store in temp directory but use the real filename + temp_dir = tempfile.gettempdir() safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) + temp_path = os.path.join(temp_dir, safe_filename) + try: # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: + with open(temp_path, 'wb') as temp_file: temp_file.write(content) logger.debug("Created temporary file for task processing", temp_path=temp_path) diff --git a/src/api/router.py b/src/api/router.py index 42080693..15a9b116 100644 --- a/src/api/router.py +++ b/src/api/router.py @@ -13,27 +13,27 @@ logger = get_logger(__name__) async def upload_ingest_router( - request: Request, - document_service=None, - langflow_file_service=None, + request: Request, + document_service=None, + langflow_file_service=None, session_manager=None, - task_service=None + task_service=None, ): """ Router endpoint that automatically routes upload requests based on configuration. - + - If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload) - If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service - + This provides a single endpoint that users can call regardless of backend configuration. All langflow uploads are processed as background tasks for better scalability. """ try: logger.debug( - "Router upload_ingest endpoint called", - disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW + "Router upload_ingest endpoint called", + disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW, ) - + # Route based on configuration if DISABLE_INGEST_WITH_LANGFLOW: # Route to traditional OpenRAG upload @@ -42,8 +42,10 @@ async def upload_ingest_router( else: # Route to Langflow upload and ingest using task service logger.debug("Routing to Langflow upload-ingest pipeline via task service") - return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) - + return await langflow_upload_ingest_task( + request, langflow_file_service, session_manager, task_service + ) + except Exception as e: logger.error("Error in upload_ingest_router", error=str(e)) error_msg = str(e) @@ -57,17 +59,14 @@ async def upload_ingest_router( async def langflow_upload_ingest_task( - request: Request, - langflow_file_service, - session_manager, - task_service + request: Request, langflow_file_service, session_manager, task_service ): """Task-based langflow upload and ingest for single/multiple files""" try: logger.debug("Task-based langflow upload_ingest endpoint called") form = await request.form() upload_files = form.getlist("file") - + if not upload_files or len(upload_files) == 0: logger.error("No files provided in task-based upload request") return JSONResponse({"error": "Missing files"}, status_code=400) @@ -77,14 +76,16 @@ async def langflow_upload_ingest_task( settings_json = form.get("settings") tweaks_json = form.get("tweaks") delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" + replace_duplicates = form.get("replace_duplicates", "false").lower() == "true" # Parse JSON fields if provided settings = None tweaks = None - + if settings_json: try: import json + settings = json.loads(settings_json) except json.JSONDecodeError as e: logger.error("Invalid settings JSON", error=str(e)) @@ -93,6 +94,7 @@ async def langflow_upload_ingest_task( if tweaks_json: try: import json + tweaks = json.loads(tweaks_json) except json.JSONDecodeError as e: logger.error("Invalid tweaks JSON", error=str(e)) @@ -106,28 +108,37 @@ async def langflow_upload_ingest_task( jwt_token = getattr(request.state, "jwt_token", None) if not user_id: - return JSONResponse({"error": "User authentication required"}, status_code=401) + return JSONResponse( + {"error": "User authentication required"}, status_code=401 + ) # Create temporary files for task processing import tempfile import os + temp_file_paths = [] - + original_filenames = [] + try: + # Create temp directory reference once + temp_dir = tempfile.gettempdir() + for upload_file in upload_files: # Read file content content = await upload_file.read() - - # Create temporary file + + # Store ORIGINAL filename (not transformed) + original_filenames.append(upload_file.filename) + + # Create temporary file with TRANSFORMED filename for filesystem safety + # Transform: spaces and / to underscore safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) - + temp_path = os.path.join(temp_dir, safe_filename) + # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: + with open(temp_path, "wb") as temp_file: temp_file.write(content) - + temp_file_paths.append(temp_path) logger.debug( @@ -136,21 +147,22 @@ async def langflow_upload_ingest_task( user_id=user_id, has_settings=bool(settings), has_tweaks=bool(tweaks), - delete_after_ingest=delete_after_ingest + delete_after_ingest=delete_after_ingest, ) # Create langflow upload task - print(f"tweaks: {tweaks}") - print(f"settings: {settings}") - print(f"jwt_token: {jwt_token}") - print(f"user_name: {user_name}") - print(f"user_email: {user_email}") - print(f"session_id: {session_id}") - print(f"delete_after_ingest: {delete_after_ingest}") - print(f"temp_file_paths: {temp_file_paths}") + logger.debug( + f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}", + ) + # Create a map between temp_file_paths and original_filenames + file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames)) + logger.debug( + f"File path to original filename map: {file_path_to_original_filename}", + ) task_id = await task_service.create_langflow_upload_task( user_id=user_id, file_paths=temp_file_paths, + original_filenames=file_path_to_original_filename, langflow_file_service=langflow_file_service, session_manager=session_manager, jwt_token=jwt_token, @@ -160,23 +172,28 @@ async def langflow_upload_ingest_task( tweaks=tweaks, settings=settings, delete_after_ingest=delete_after_ingest, + replace_duplicates=replace_duplicates, ) logger.debug("Langflow upload task created successfully", task_id=task_id) - - return JSONResponse({ - "task_id": task_id, - "message": f"Langflow upload task created for {len(upload_files)} file(s)", - "file_count": len(upload_files) - }, status_code=202) # 202 Accepted for async processing - + + return JSONResponse( + { + "task_id": task_id, + "message": f"Langflow upload task created for {len(upload_files)} file(s)", + "file_count": len(upload_files), + }, + status_code=202, + ) # 202 Accepted for async processing + except Exception: # Clean up temp files on error from utils.file_utils import safe_unlink + for temp_path in temp_file_paths: safe_unlink(temp_path) raise - + except Exception as e: logger.error( "Task-based langflow upload_ingest endpoint failed", @@ -184,5 +201,6 @@ async def langflow_upload_ingest_task( error=str(e), ) import traceback + logger.error("Full traceback", traceback=traceback.format_exc()) return JSONResponse({"error": str(e)}, status_code=500) diff --git a/src/main.py b/src/main.py index 230ded79..bf6da342 100644 --- a/src/main.py +++ b/src/main.py @@ -953,6 +953,17 @@ async def create_app(): methods=["POST", "GET"], ), # Document endpoints + Route( + "/documents/check-filename", + require_auth(services["session_manager"])( + partial( + documents.check_filename_exists, + document_service=services["document_service"], + session_manager=services["session_manager"], + ) + ), + methods=["GET"], + ), Route( "/documents/delete-by-filename", require_auth(services["session_manager"])( diff --git a/src/models/processors.py b/src/models/processors.py index bd2118a9..4a5d96b5 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -55,6 +55,96 @@ class TaskProcessor: await asyncio.sleep(retry_delay) retry_delay *= 2 # Exponential backoff + async def check_filename_exists( + self, + filename: str, + opensearch_client, + ) -> bool: + """ + Check if a document with the given filename already exists in OpenSearch. + Returns True if any chunks with this filename exist. + """ + from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_search_body + import asyncio + + max_retries = 3 + retry_delay = 1.0 + + for attempt in range(max_retries): + try: + # Search for any document with this exact filename + search_body = build_filename_search_body(filename, size=1, source=False) + + response = await opensearch_client.search( + index=INDEX_NAME, + body=search_body + ) + + # Check if any hits were found + hits = response.get("hits", {}).get("hits", []) + return len(hits) > 0 + + except (asyncio.TimeoutError, Exception) as e: + if attempt == max_retries - 1: + logger.error( + "OpenSearch filename check failed after retries", + filename=filename, + error=str(e), + attempt=attempt + 1 + ) + # On final failure, assume document doesn't exist (safer to reprocess than skip) + logger.warning( + "Assuming filename doesn't exist due to connection issues", + filename=filename + ) + return False + else: + logger.warning( + "OpenSearch filename check failed, retrying", + filename=filename, + error=str(e), + attempt=attempt + 1, + retry_in=retry_delay + ) + await asyncio.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + + async def delete_document_by_filename( + self, + filename: str, + opensearch_client, + ) -> None: + """ + Delete all chunks of a document with the given filename from OpenSearch. + """ + from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_delete_body + + try: + # Delete all documents with this filename + delete_body = build_filename_delete_body(filename) + + response = await opensearch_client.delete_by_query( + index=INDEX_NAME, + body=delete_body + ) + + deleted_count = response.get("deleted", 0) + logger.info( + "Deleted existing document chunks", + filename=filename, + deleted_count=deleted_count + ) + + except Exception as e: + logger.error( + "Failed to delete existing document", + filename=filename, + error=str(e) + ) + raise + async def process_document_standard( self, file_path: str, @@ -527,6 +617,7 @@ class LangflowFileProcessor(TaskProcessor): tweaks: dict = None, settings: dict = None, delete_after_ingest: bool = True, + replace_duplicates: bool = False, ): super().__init__() self.langflow_file_service = langflow_file_service @@ -539,6 +630,7 @@ class LangflowFileProcessor(TaskProcessor): self.tweaks = tweaks or {} self.settings = settings self.delete_after_ingest = delete_after_ingest + self.replace_duplicates = replace_duplicates async def process_item( self, upload_task: UploadTask, item: str, file_task: FileTask @@ -554,37 +646,40 @@ class LangflowFileProcessor(TaskProcessor): file_task.updated_at = time.time() try: - # Compute hash and check if already exists - from utils.hash_utils import hash_id - file_hash = hash_id(item) + # Use the ORIGINAL filename stored in file_task (not the transformed temp path) + # This ensures we check/store the original filename with spaces, etc. + original_filename = file_task.filename or os.path.basename(item) - # Check if document already exists + # Check if document with same filename already exists opensearch_client = self.session_manager.get_user_opensearch_client( self.owner_user_id, self.jwt_token ) - if await self.check_document_exists(file_hash, opensearch_client): - file_task.status = TaskStatus.COMPLETED - file_task.result = {"status": "unchanged", "id": file_hash} + + filename_exists = await self.check_filename_exists(original_filename, opensearch_client) + + if filename_exists and not self.replace_duplicates: + # Duplicate exists and user hasn't confirmed replacement + file_task.status = TaskStatus.FAILED + file_task.error = f"File with name '{original_filename}' already exists" file_task.updated_at = time.time() - upload_task.successful_files += 1 + upload_task.failed_files += 1 return + elif filename_exists and self.replace_duplicates: + # Delete existing document before uploading new one + logger.info(f"Replacing existing document: {original_filename}") + await self.delete_document_by_filename(original_filename, opensearch_client) # Read file content for processing with open(item, 'rb') as f: content = f.read() - # Create file tuple for upload - temp_filename = os.path.basename(item) - # Extract original filename from temp file suffix (remove tmp prefix) - if "_" in temp_filename: - filename = temp_filename.split("_", 1)[1] # Get everything after first _ - else: - filename = temp_filename - content_type, _ = mimetypes.guess_type(filename) + # Create file tuple for upload using ORIGINAL filename + # This ensures the document is indexed with the original name + content_type, _ = mimetypes.guess_type(original_filename) if not content_type: content_type = 'application/octet-stream' - - file_tuple = (filename, content, content_type) + + file_tuple = (original_filename, content, content_type) # Get JWT token using same logic as DocumentFileProcessor # This will handle anonymous JWT creation if needed diff --git a/src/models/tasks.py b/src/models/tasks.py index 236927ab..253cabb5 100644 --- a/src/models/tasks.py +++ b/src/models/tasks.py @@ -20,7 +20,8 @@ class FileTask: retry_count: int = 0 created_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time) - + filename: Optional[str] = None # Original filename for display + @property def duration_seconds(self) -> float: """Duration in seconds from creation to last update""" diff --git a/src/services/task_service.py b/src/services/task_service.py index be5312a0..735ad483 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -1,6 +1,5 @@ import asyncio import random -from typing import Dict, Optional import time import uuid @@ -59,6 +58,7 @@ class TaskService: file_paths: list, langflow_file_service, session_manager, + original_filenames: dict | None = None, jwt_token: str = None, owner_name: str = None, owner_email: str = None, @@ -66,6 +66,7 @@ class TaskService: tweaks: dict = None, settings: dict = None, delete_after_ingest: bool = True, + replace_duplicates: bool = False, ) -> str: """Create a new upload task for Langflow file processing with upload and ingest""" # Use LangflowFileProcessor with user context @@ -82,18 +83,35 @@ class TaskService: tweaks=tweaks, settings=settings, delete_after_ingest=delete_after_ingest, + replace_duplicates=replace_duplicates, ) - return await self.create_custom_task(user_id, file_paths, processor) + return await self.create_custom_task(user_id, file_paths, processor, original_filenames) - async def create_custom_task(self, user_id: str, items: list, processor) -> str: + async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str: """Create a new task with custom processor for any type of items""" + import os # Store anonymous tasks under a stable key so they can be retrieved later store_user_id = user_id or AnonymousUser().user_id task_id = str(uuid.uuid4()) + + # Create file tasks with original filenames if provided + normalized_originals = ( + {str(k): v for k, v in original_filenames.items()} if original_filenames else {} + ) + file_tasks = { + str(item): FileTask( + file_path=str(item), + filename=normalized_originals.get( + str(item), os.path.basename(str(item)) + ), + ) + for item in items + } + upload_task = UploadTask( task_id=task_id, total_files=len(items), - file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, + file_tasks=file_tasks, ) # Attach the custom processor to the task @@ -268,6 +286,7 @@ class TaskService: "created_at": file_task.created_at, "updated_at": file_task.updated_at, "duration_seconds": file_task.duration_seconds, + "filename": file_task.filename, } # Count running and pending files @@ -322,6 +341,7 @@ class TaskService: "created_at": file_task.created_at, "updated_at": file_task.updated_at, "duration_seconds": file_task.duration_seconds, + "filename": file_task.filename, } if file_task.status.value == "running": diff --git a/src/tui/managers/docling_manager.py b/src/tui/managers/docling_manager.py index 5b1100cc..6fecfff9 100644 --- a/src/tui/managers/docling_manager.py +++ b/src/tui/managers/docling_manager.py @@ -31,10 +31,14 @@ class DoclingManager: self._process: Optional[subprocess.Popen] = None self._port = 5001 - self._host = "127.0.0.1" + self._host = self._get_host_for_containers() # Get appropriate host IP based on runtime self._running = False self._external_process = False + # PID file to track docling-serve across sessions (in current working directory) + from pathlib import Path + self._pid_file = Path.cwd() / ".docling.pid" + # Log storage - simplified, no queue self._log_buffer: List[str] = [] self._max_log_lines = 1000 @@ -42,22 +46,198 @@ class DoclingManager: self._initialized = True - def cleanup(self): - """Cleanup resources and stop any running processes.""" - if self._process and self._process.poll() is None: - self._add_log_entry("Cleaning up docling-serve process on exit") - try: - self._process.terminate() - self._process.wait(timeout=5) - except subprocess.TimeoutExpired: - self._process.kill() - self._process.wait() - except Exception as e: - self._add_log_entry(f"Error during cleanup: {e}") + # Try to recover existing process from PID file + self._recover_from_pid_file() - self._running = False - self._process = None + def _get_host_for_containers(self) -> str: + """ + Return a host IP that containers can reach (a bridge/CNI gateway). + Prefers Docker/Podman network gateways; falls back to bridge interfaces. + """ + import subprocess, json, shutil, re, logging + logger = logging.getLogger(__name__) + + def run(cmd, timeout=2, text=True): + return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout) + + gateways = [] + compose_gateways = [] # Highest priority - compose project networks + active_gateways = [] # Medium priority - networks with containers + + # ---- Docker: enumerate networks and collect gateways + if shutil.which("docker"): + try: + ls = run(["docker", "network", "ls", "--format", "{{.Name}}"]) + if ls.returncode == 0: + for name in filter(None, ls.stdout.splitlines()): + try: + insp = run(["docker", "network", "inspect", name, "--format", "{{json .}}"]) + if insp.returncode == 0 and insp.stdout.strip(): + nw = json.loads(insp.stdout)[0] if insp.stdout.strip().startswith("[") else json.loads(insp.stdout) + ipam = nw.get("IPAM", {}) + containers = nw.get("Containers", {}) + for cfg in ipam.get("Config", []) or []: + gw = cfg.get("Gateway") + if gw: + # Highest priority: compose networks (ending in _default) + if name.endswith("_default"): + compose_gateways.append(gw) + # Medium priority: networks with active containers + elif len(containers) > 0: + active_gateways.append(gw) + # Low priority: empty networks + else: + gateways.append(gw) + except Exception: + pass + except Exception: + pass + + # ---- Podman: enumerate networks and collect gateways (netavark) + if shutil.which("podman"): + try: + # modern podman supports JSON format + ls = run(["podman", "network", "ls", "--format", "json"]) + if ls.returncode == 0 and ls.stdout.strip(): + for net in json.loads(ls.stdout): + name = net.get("name") or net.get("Name") + if not name: + continue + try: + insp = run(["podman", "network", "inspect", name, "--format", "json"]) + if insp.returncode == 0 and insp.stdout.strip(): + arr = json.loads(insp.stdout) + for item in (arr if isinstance(arr, list) else [arr]): + for sn in item.get("subnets", []) or []: + gw = sn.get("gateway") + if gw: + # Prioritize compose/project networks + if name.endswith("_default") or "_" in name: + compose_gateways.append(gw) + else: + gateways.append(gw) + except Exception: + pass + except Exception: + pass + + # ---- Fallback: parse host interfaces for common bridges + if not gateways: + try: + if shutil.which("ip"): + show = run(["ip", "-o", "-4", "addr", "show"]) + if show.returncode == 0: + for line in show.stdout.splitlines(): + # e.g. "12: br-3f0f... inet 172.18.0.1/16 ..." + m = re.search(r"^\d+:\s+([a-zA-Z0-9_.:-]+)\s+.*\binet\s+(\d+\.\d+\.\d+\.\d+)/", line) + if not m: + continue + ifname, ip = m.group(1), m.group(2) + if ifname == "docker0" or ifname.startswith(("br-", "cni")): + gateways.append(ip) + else: + # As a last resort, try net-tools ifconfig output + if shutil.which("ifconfig"): + show = run(["ifconfig"]) + for block in show.stdout.split("\n\n"): + if any(block.strip().startswith(n) for n in ("docker0", "cni", "br-")): + m = re.search(r"inet (?:addr:)?(\d+\.\d+\.\d+\.\d+)", block) + if m: + gateways.append(m.group(1)) + except Exception: + pass + + # Dedup, prioritizing: 1) compose networks, 2) active networks, 3) all others + seen, uniq = set(), [] + # First: compose project networks (_default suffix) + for ip in compose_gateways: + if ip not in seen: + uniq.append(ip) + seen.add(ip) + # Second: networks with active containers + for ip in active_gateways: + if ip not in seen: + uniq.append(ip) + seen.add(ip) + # Third: all other gateways + for ip in gateways: + if ip not in seen: + uniq.append(ip) + seen.add(ip) + + if uniq: + if len(uniq) > 1: + logger.info("Container-reachable host IP candidates: %s", ", ".join(uniq)) + else: + logger.info("Container-reachable host IP: %s", uniq[0]) + return uniq[0] + + # Nothing found: warn clearly + logger.warning( + "No container bridge IP found. If using rootless Podman (slirp4netns), there is no host bridge; publish ports or use 10.0.2.2 from the container." + ) + # Returning localhost is honest only for same-namespace; keep it explicit: + return "127.0.0.1" + + def cleanup(self): + """Cleanup resources but keep docling-serve running across sessions.""" + # Don't stop the process on exit - let it persist + # Just clean up our references + self._add_log_entry("TUI exiting - docling-serve will continue running") + # Note: We keep the PID file so we can reconnect in future sessions + def _save_pid(self, pid: int) -> None: + """Save the process PID to a file for persistence across sessions.""" + try: + self._pid_file.write_text(str(pid)) + self._add_log_entry(f"Saved PID {pid} to {self._pid_file}") + except Exception as e: + self._add_log_entry(f"Failed to save PID file: {e}") + + def _load_pid(self) -> Optional[int]: + """Load the process PID from file.""" + try: + if self._pid_file.exists(): + pid_str = self._pid_file.read_text().strip() + if pid_str.isdigit(): + return int(pid_str) + except Exception as e: + self._add_log_entry(f"Failed to load PID file: {e}") + return None + + def _clear_pid_file(self) -> None: + """Remove the PID file.""" + try: + if self._pid_file.exists(): + self._pid_file.unlink() + self._add_log_entry("Cleared PID file") + except Exception as e: + self._add_log_entry(f"Failed to clear PID file: {e}") + + def _is_process_running(self, pid: int) -> bool: + """Check if a process with the given PID is running.""" + try: + # Send signal 0 to check if process exists (doesn't actually send a signal) + os.kill(pid, 0) + return True + except OSError: + return False + + def _recover_from_pid_file(self) -> None: + """Try to recover connection to existing docling-serve process from PID file.""" + pid = self._load_pid() + if pid is not None: + if self._is_process_running(pid): + self._add_log_entry(f"Recovered existing docling-serve process (PID: {pid})") + # Mark as external process since we didn't start it in this session + self._external_process = True + self._running = True + # Note: We don't have a Popen object for this process, but that's OK + # We'll detect it's running via port check + else: + self._add_log_entry(f"Stale PID file found (PID: {pid} not running)") + self._clear_pid_file() + def _add_log_entry(self, message: str) -> None: """Add a log entry to the buffer (thread-safe).""" timestamp = time.strftime("%Y-%m-%d %H:%M:%S") @@ -70,43 +250,35 @@ class DoclingManager: self._log_buffer = self._log_buffer[-self._max_log_lines:] def is_running(self) -> bool: - """Check if docling serve is running.""" - # First check our internal state - internal_running = self._running and self._process is not None and self._process.poll() is None - - # If we think it's not running, check if something is listening on the port - # This handles cases where docling-serve was started outside the TUI - if not internal_running: - try: - import socket - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.settimeout(0.5) - result = s.connect_ex((self._host, self._port)) - s.close() - - # If port is in use, something is running there - if result == 0: - # Only log this once when we first detect external process - if not self._external_process: - self._add_log_entry(f"Detected external docling-serve running on {self._host}:{self._port}") - # Set a flag to indicate this is an external process - self._external_process = True - return True - except Exception as e: - # Only log errors occasionally to avoid spam - if not hasattr(self, '_last_port_error') or self._last_port_error != str(e): - self._add_log_entry(f"Error checking port: {e}") - self._last_port_error = str(e) - else: - # If we started it, it's not external + """Check if docling serve is running (by PID only).""" + # Check if we have a direct process handle + if self._process is not None and self._process.poll() is None: + self._running = True self._external_process = False + return True - return internal_running + # Check if we have a PID from file + pid = self._load_pid() + if pid is not None and self._is_process_running(pid): + self._running = True + self._external_process = True + return True + + # No running process found + self._running = False + self._external_process = False + return False def get_status(self) -> Dict[str, Any]: """Get current status of docling serve.""" if self.is_running(): - pid = self._process.pid if self._process else None + # Try to get PID from process handle first, then from PID file + pid = None + if self._process: + pid = self._process.pid + else: + pid = self._load_pid() + return { "status": "running", "port": self._port, @@ -127,13 +299,28 @@ class DoclingManager: "pid": None } - async def start(self, port: int = 5001, host: str = "127.0.0.1", enable_ui: bool = False) -> Tuple[bool, str]: + async def start(self, port: int = 5001, host: Optional[str] = None, enable_ui: bool = False) -> Tuple[bool, str]: """Start docling serve as external process.""" if self.is_running(): return False, "Docling serve is already running" self._port = port - self._host = host + # Use provided host or the bridge IP we detected in __init__ + if host is not None: + self._host = host + # else: keep self._host as already set in __init__ + + # Check if port is already in use before trying to start + import socket + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(0.5) + result = s.connect_ex((self._host, self._port)) + s.close() + if result == 0: + return False, f"Port {self._port} on {self._host} is already in use by another process. Please stop it first." + except Exception as e: + self._add_log_entry(f"Error checking port availability: {e}") # Clear log buffer when starting self._log_buffer = [] @@ -146,14 +333,14 @@ class DoclingManager: if shutil.which("uv") and (os.path.exists("pyproject.toml") or os.getenv("VIRTUAL_ENV")): cmd = [ "uv", "run", "python", "-m", "docling_serve", "run", - "--host", host, - "--port", str(port), + "--host", self._host, + "--port", str(self._port), ] else: cmd = [ sys.executable, "-m", "docling_serve", "run", - "--host", host, - "--port", str(port), + "--host", self._host, + "--port", str(self._port), ] if enable_ui: @@ -173,6 +360,9 @@ class DoclingManager: self._running = True self._add_log_entry("External process started") + # Save the PID to file for persistence + self._save_pid(self._process.pid) + # Start a thread to capture output self._start_output_capture() @@ -192,11 +382,11 @@ class DoclingManager: import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(0.5) - result = s.connect_ex((host, port)) + result = s.connect_ex((self._host, self._port)) s.close() if result == 0: - self._add_log_entry(f"Docling-serve is now listening on {host}:{port}") + self._add_log_entry(f"Docling-serve is now listening on {self._host}:{self._port}") break except: pass @@ -298,9 +488,12 @@ class DoclingManager: try: self._add_log_entry("Stopping docling-serve process") + pid_to_stop = None + if self._process: - # We started this process, so we can stop it directly - self._add_log_entry(f"Terminating our process (PID: {self._process.pid})") + # We have a direct process handle + pid_to_stop = self._process.pid + self._add_log_entry(f"Terminating our process (PID: {pid_to_stop})") self._process.terminate() # Wait for it to stop @@ -315,16 +508,32 @@ class DoclingManager: self._add_log_entry("Process force killed") elif self._external_process: - # This is an external process, we can't stop it directly - self._add_log_entry("Cannot stop external docling-serve process - it was started outside the TUI") - self._running = False - self._external_process = False - return False, "Cannot stop external docling-serve process. Please stop it manually." + # This is a process we recovered from PID file + pid_to_stop = self._load_pid() + if pid_to_stop and self._is_process_running(pid_to_stop): + self._add_log_entry(f"Stopping process from PID file (PID: {pid_to_stop})") + try: + os.kill(pid_to_stop, 15) # SIGTERM + # Wait a bit for graceful shutdown + await asyncio.sleep(2) + if self._is_process_running(pid_to_stop): + # Still running, force kill + self._add_log_entry(f"Force killing process (PID: {pid_to_stop})") + os.kill(pid_to_stop, 9) # SIGKILL + except Exception as e: + self._add_log_entry(f"Error stopping external process: {e}") + return False, f"Error stopping external process: {str(e)}" + else: + self._add_log_entry("External process not found") + return False, "Process not found" self._running = False self._process = None self._external_process = False + # Clear the PID file since we intentionally stopped the service + self._clear_pid_file() + self._add_log_entry("Docling serve stopped successfully") return True, "Docling serve stopped successfully" diff --git a/src/tui/screens/welcome.py b/src/tui/screens/welcome.py index c93f5561..217b0611 100644 --- a/src/tui/screens/welcome.py +++ b/src/tui/screens/welcome.py @@ -118,9 +118,16 @@ class WelcomeScreen(Screen): welcome_text.append(ascii_art, style="bold white") welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim") - if self.services_running: + # Check if all services are running + all_services_running = self.services_running and self.docling_running + + if all_services_running: welcome_text.append( - "✓ Services are currently running\n\n", style="bold green" + "✓ All services are running\n\n", style="bold green" + ) + elif self.services_running or self.docling_running: + welcome_text.append( + "⚠ Some services are running\n\n", style="bold yellow" ) elif self.has_oauth_config: welcome_text.append( @@ -140,16 +147,19 @@ class WelcomeScreen(Screen): buttons = [] - if self.services_running: - # Services running - show app link first, then stop services + # Check if all services (native + container) are running + all_services_running = self.services_running and self.docling_running + + if all_services_running: + # All services running - show app link first, then stop all buttons.append( Button("Launch OpenRAG", variant="success", id="open-app-btn") ) buttons.append( - Button("Stop Container Services", variant="error", id="stop-services-btn") + Button("Stop All Services", variant="error", id="stop-all-services-btn") ) else: - # Services not running - show setup options and start services + # Some or no services running - show setup options and start all if has_oauth: # If OAuth is configured, only show advanced setup buttons.append( @@ -165,25 +175,7 @@ class WelcomeScreen(Screen): ) buttons.append( - Button("Start Container Services", variant="primary", id="start-services-btn") - ) - - # Native services controls - if self.docling_running: - buttons.append( - Button( - "Stop Native Services", - variant="warning", - id="stop-native-services-btn", - ) - ) - else: - buttons.append( - Button( - "Start Native Services", - variant="primary", - id="start-native-services-btn", - ) + Button("Start All Services", variant="primary", id="start-all-services-btn") ) # Always show status option @@ -213,7 +205,7 @@ class WelcomeScreen(Screen): ) # Set default button focus - if self.services_running: + if self.services_running and self.docling_running: self.default_button_id = "open-app-btn" elif self.has_oauth_config: self.default_button_id = "advanced-setup-btn" @@ -234,7 +226,7 @@ class WelcomeScreen(Screen): def _focus_appropriate_button(self) -> None: """Focus the appropriate button based on current state.""" try: - if self.services_running: + if self.services_running and self.docling_running: self.query_one("#open-app-btn").focus() elif self.has_oauth_config: self.query_one("#advanced-setup-btn").focus() @@ -253,20 +245,16 @@ class WelcomeScreen(Screen): self.action_monitor() elif event.button.id == "diagnostics-btn": self.action_diagnostics() - elif event.button.id == "start-services-btn": - self.action_start_stop_services() - elif event.button.id == "stop-services-btn": - self.action_start_stop_services() - elif event.button.id == "start-native-services-btn": - self.action_start_native_services() - elif event.button.id == "stop-native-services-btn": - self.action_stop_native_services() + elif event.button.id == "start-all-services-btn": + self.action_start_all_services() + elif event.button.id == "stop-all-services-btn": + self.action_stop_all_services() elif event.button.id == "open-app-btn": self.action_open_app() def action_default_action(self) -> None: """Handle Enter key - go to default action based on state.""" - if self.services_running: + if self.services_running and self.docling_running: self.action_open_app() elif self.has_oauth_config: self.action_full_setup() @@ -297,28 +285,13 @@ class WelcomeScreen(Screen): self.app.push_screen(DiagnosticsScreen()) - def action_start_stop_services(self) -> None: - """Start or stop all services (containers + docling).""" - if self.services_running: - # Stop services - show modal with progress - if self.container_manager.is_available(): - command_generator = self.container_manager.stop_services() - modal = CommandOutputModal( - "Stopping Services", - command_generator, - on_complete=self._on_services_operation_complete, - ) - self.app.push_screen(modal) - else: - # Start services - show modal with progress - if self.container_manager.is_available(): - command_generator = self.container_manager.start_services() - modal = CommandOutputModal( - "Starting Services", - command_generator, - on_complete=self._on_services_operation_complete, - ) - self.app.push_screen(modal) + def action_start_all_services(self) -> None: + """Start all services (native first, then containers).""" + self.run_worker(self._start_all_services()) + + def action_stop_all_services(self) -> None: + """Stop all services (containers first, then native).""" + self.run_worker(self._stop_all_services()) async def _on_services_operation_complete(self) -> None: """Handle completion of services start/stop operation.""" @@ -334,7 +307,7 @@ class WelcomeScreen(Screen): def _update_default_button(self) -> None: """Update the default button target based on state.""" - if self.services_running: + if self.services_running and self.docling_running: self.default_button_id = "open-app-btn" elif self.has_oauth_config: self.default_button_id = "advanced-setup-btn" @@ -362,51 +335,84 @@ class WelcomeScreen(Screen): self.call_after_refresh(self._focus_appropriate_button) - def action_start_native_services(self) -> None: - """Start native services (docling).""" - if self.docling_running: - self.notify("Native services are already running.", severity="warning") - return + async def _start_all_services(self) -> None: + """Start all services: containers first, then native services.""" + # Step 1: Start container services first (to create the network) + if self.container_manager.is_available(): + command_generator = self.container_manager.start_services() + modal = CommandOutputModal( + "Starting Container Services", + command_generator, + on_complete=self._on_containers_started_start_native, + ) + self.app.push_screen(modal) + else: + self.notify("No container runtime available", severity="warning") + # Still try to start native services + await self._start_native_services_after_containers() - self.run_worker(self._start_native_services()) + async def _on_containers_started_start_native(self) -> None: + """Called after containers start successfully, now start native services.""" + # Update container state + self._detect_services_sync() - async def _start_native_services(self) -> None: - """Worker task to start native services.""" - try: + # Now start native services (docling-serve can now detect the compose network) + await self._start_native_services_after_containers() + + async def _start_native_services_after_containers(self) -> None: + """Start native services after containers have been started.""" + if not self.docling_manager.is_running(): + self.notify("Starting native services...", severity="information") success, message = await self.docling_manager.start() if success: - self.docling_running = True self.notify(message, severity="information") else: self.notify(f"Failed to start native services: {message}", severity="error") - except Exception as exc: - self.notify(f"Error starting native services: {exc}", severity="error") - finally: - self.docling_running = self.docling_manager.is_running() - await self._refresh_welcome_content() + else: + self.notify("Native services already running", severity="information") - def action_stop_native_services(self) -> None: - """Stop native services (docling).""" - if not self.docling_running and not self.docling_manager.is_running(): - self.notify("Native services are not running.", severity="warning") - return + # Update state + self.docling_running = self.docling_manager.is_running() + await self._refresh_welcome_content() - self.run_worker(self._stop_native_services()) + async def _stop_all_services(self) -> None: + """Stop all services: containers first, then native.""" + # Step 1: Stop container services + if self.container_manager.is_available() and self.services_running: + command_generator = self.container_manager.stop_services() + modal = CommandOutputModal( + "Stopping Container Services", + command_generator, + on_complete=self._on_stop_containers_complete, + ) + self.app.push_screen(modal) + else: + # No containers to stop, go directly to stopping native services + await self._stop_native_services_after_containers() - async def _stop_native_services(self) -> None: - """Worker task to stop native services.""" - try: + async def _on_stop_containers_complete(self) -> None: + """Called after containers are stopped, now stop native services.""" + # Update container state + self._detect_services_sync() + + # Now stop native services + await self._stop_native_services_after_containers() + + async def _stop_native_services_after_containers(self) -> None: + """Stop native services after containers have been stopped.""" + if self.docling_manager.is_running(): + self.notify("Stopping native services...", severity="information") success, message = await self.docling_manager.stop() if success: - self.docling_running = False self.notify(message, severity="information") else: self.notify(f"Failed to stop native services: {message}", severity="error") - except Exception as exc: - self.notify(f"Error stopping native services: {exc}", severity="error") - finally: - self.docling_running = self.docling_manager.is_running() - await self._refresh_welcome_content() + else: + self.notify("Native services already stopped", severity="information") + + # Update state + self.docling_running = self.docling_manager.is_running() + await self._refresh_welcome_content() def action_open_app(self) -> None: """Open the OpenRAG app in the default browser.""" diff --git a/src/utils/opensearch_queries.py b/src/utils/opensearch_queries.py new file mode 100644 index 00000000..f29c6283 --- /dev/null +++ b/src/utils/opensearch_queries.py @@ -0,0 +1,55 @@ +""" +Utility functions for constructing OpenSearch queries consistently. +""" +from typing import Union, List + + +def build_filename_query(filename: str) -> dict: + """ + Build a standardized query for finding documents by filename. + + Args: + filename: The exact filename to search for + + Returns: + A dict containing the OpenSearch query body + """ + return { + "term": { + "filename": filename + } + } + + +def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict: + """ + Build a complete search body for checking if a filename exists. + + Args: + filename: The exact filename to search for + size: Number of results to return (default: 1) + source: Whether to include source fields, or list of specific fields to include (default: False) + + Returns: + A dict containing the complete OpenSearch search body + """ + return { + "query": build_filename_query(filename), + "size": size, + "_source": source + } + + +def build_filename_delete_body(filename: str) -> dict: + """ + Build a delete-by-query body for removing all documents with a filename. + + Args: + filename: The exact filename to delete + + Returns: + A dict containing the OpenSearch delete-by-query body + """ + return { + "query": build_filename_query(filename) + }