From 7dfe90c8a6cfb9236dc66bdd908af1345ef21050 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 15:49:21 -0300 Subject: [PATCH 01/23] fixed ibm logo --- frontend/components/logo/ibm-logo.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/components/logo/ibm-logo.tsx b/frontend/components/logo/ibm-logo.tsx index 158ffa3b..e37adec1 100644 --- a/frontend/components/logo/ibm-logo.tsx +++ b/frontend/components/logo/ibm-logo.tsx @@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps) { {...props} > IBM watsonx.ai Logo - + Date: Thu, 2 Oct 2025 16:50:34 -0300 Subject: [PATCH 02/23] implement tasks fetching and cancelling on useQuery --- .../api/mutations/useCancelTaskMutation.ts | 47 +++++++++++ .../app/api/queries/useGetTaskStatusQuery.ts | 80 +++++++++++++++++++ .../src/app/api/queries/useGetTasksQuery.ts | 79 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 frontend/src/app/api/mutations/useCancelTaskMutation.ts create mode 100644 frontend/src/app/api/queries/useGetTaskStatusQuery.ts create mode 100644 frontend/src/app/api/queries/useGetTasksQuery.ts diff --git a/frontend/src/app/api/mutations/useCancelTaskMutation.ts b/frontend/src/app/api/mutations/useCancelTaskMutation.ts new file mode 100644 index 00000000..1bf2faed --- /dev/null +++ b/frontend/src/app/api/mutations/useCancelTaskMutation.ts @@ -0,0 +1,47 @@ +import { + type UseMutationOptions, + useMutation, + useQueryClient, +} from "@tanstack/react-query"; + +export interface CancelTaskRequest { + taskId: string; +} + +export interface CancelTaskResponse { + status: string; + task_id: string; +} + +export const useCancelTaskMutation = ( + options?: Omit< + UseMutationOptions, + "mutationFn" + > +) => { + const queryClient = useQueryClient(); + + async function cancelTask( + variables: CancelTaskRequest, + ): Promise { + const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, { + method: "POST", + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || "Failed to cancel task"); + } + + return response.json(); + } + + return useMutation({ + mutationFn: cancelTask, + onSuccess: () => { + // Invalidate tasks query to refresh the list + queryClient.invalidateQueries({ queryKey: ["tasks"] }); + }, + ...options, + }); +}; diff --git a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts new file mode 100644 index 00000000..17cd2d16 --- /dev/null +++ b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts @@ -0,0 +1,80 @@ +import { + type UseQueryOptions, + useQuery, + useQueryClient, +} from "@tanstack/react-query"; + +export interface TaskStatus { + task_id: string; + status: + | "pending" + | "running" + | "processing" + | "completed" + | "failed" + | "error"; + total_files?: number; + processed_files?: number; + successful_files?: number; + failed_files?: number; + running_files?: number; + pending_files?: number; + created_at: string; + updated_at: string; + duration_seconds?: number; + result?: Record; + error?: string; + files?: Record>; +} + +export const useGetTaskStatusQuery = ( + taskId: string, + options?: Omit, "queryKey" | "queryFn"> +) => { + const queryClient = useQueryClient(); + + async function getTaskStatus(): Promise { + if (!taskId) { + return null; + } + + const response = await fetch(`/api/tasks/${taskId}`); + + if (!response.ok) { + if (response.status === 404) { + return null; // Task not found + } + throw new Error("Failed to fetch task status"); + } + + return response.json(); + } + + const queryResult = useQuery( + { + queryKey: ["task-status", taskId], + queryFn: getTaskStatus, + refetchInterval: (data) => { + // Only poll if the task is still active + if (!data) { + return false; // Stop polling if no data + } + + const isActive = + data.status === "pending" || + data.status === "running" || + data.status === "processing"; + + return isActive ? 3000 : false; // Poll every 3 seconds if active + }, + refetchIntervalInBackground: true, + staleTime: 0, // Always consider data stale to ensure fresh updates + gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes + enabled: !!taskId, // Only run if taskId is provided + ...options, + }, + queryClient, + ); + + return queryResult; +}; diff --git a/frontend/src/app/api/queries/useGetTasksQuery.ts b/frontend/src/app/api/queries/useGetTasksQuery.ts new file mode 100644 index 00000000..1ea59d26 --- /dev/null +++ b/frontend/src/app/api/queries/useGetTasksQuery.ts @@ -0,0 +1,79 @@ +import { + type UseQueryOptions, + useQuery, + useQueryClient, +} from "@tanstack/react-query"; + +export interface Task { + task_id: string; + status: + | "pending" + | "running" + | "processing" + | "completed" + | "failed" + | "error"; + total_files?: number; + processed_files?: number; + successful_files?: number; + failed_files?: number; + running_files?: number; + pending_files?: number; + created_at: string; + updated_at: string; + duration_seconds?: number; + result?: Record; + error?: string; + files?: Record>; +} + +export interface TasksResponse { + tasks: Task[]; +} + +export const useGetTasksQuery = ( + options?: Omit, "queryKey" | "queryFn"> +) => { + const queryClient = useQueryClient(); + + async function getTasks(): Promise { + const response = await fetch("/api/tasks"); + + if (!response.ok) { + throw new Error("Failed to fetch tasks"); + } + + const data: TasksResponse = await response.json(); + return data.tasks || []; + } + + const queryResult = useQuery( + { + queryKey: ["tasks"], + queryFn: getTasks, + refetchInterval: (query) => { + // Only poll if there are tasks with pending or running status + const data = query.state.data; + if (!data || data.length === 0) { + return false; // Stop polling if no tasks + } + + const hasActiveTasks = data.some( + (task: Task) => + task.status === "pending" || + task.status === "running" || + task.status === "processing" + ); + + return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist + }, + refetchIntervalInBackground: true, + staleTime: 0, // Always consider data stale to ensure fresh updates + gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes + ...options, + }, + queryClient, + ); + + return queryResult; +}; From aa61ba265c3a9744873c817c75779a15910fe9e7 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 16:50:47 -0300 Subject: [PATCH 03/23] made files and toast appear just once, use new queries --- frontend/components/knowledge-dropdown.tsx | 1117 ++++++++++---------- frontend/src/contexts/task-context.tsx | 427 ++++---- 2 files changed, 733 insertions(+), 811 deletions(-) diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index ee49fc3a..9b71ee81 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -1,24 +1,25 @@ "use client"; import { - ChevronDown, - Cloud, - FolderOpen, - Loader2, - PlugZap, - Plus, - Upload, + ChevronDown, + Cloud, + FolderOpen, + Loader2, + PlugZap, + Plus, + Upload, } from "lucide-react"; import { useRouter } from "next/navigation"; import { useEffect, useRef, useState } from "react"; import { toast } from "sonner"; +import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery"; import { Button } from "@/components/ui/button"; import { - Dialog, - DialogContent, - DialogDescription, - DialogHeader, - DialogTitle, + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, } from "@/components/ui/dialog"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; @@ -26,600 +27,590 @@ import { useTask } from "@/contexts/task-context"; import { cn } from "@/lib/utils"; interface KnowledgeDropdownProps { - active?: boolean; - variant?: "navigation" | "button"; + active?: boolean; + variant?: "navigation" | "button"; } export function KnowledgeDropdown({ - active, - variant = "navigation", + active, + variant = "navigation", }: KnowledgeDropdownProps) { - const { addTask } = useTask(); - const router = useRouter(); - const [isOpen, setIsOpen] = useState(false); - const [showFolderDialog, setShowFolderDialog] = useState(false); - const [showS3Dialog, setShowS3Dialog] = useState(false); - const [awsEnabled, setAwsEnabled] = useState(false); - const [folderPath, setFolderPath] = useState("/app/documents/"); - const [bucketUrl, setBucketUrl] = useState("s3://"); - const [folderLoading, setFolderLoading] = useState(false); - const [s3Loading, setS3Loading] = useState(false); - const [fileUploading, setFileUploading] = useState(false); - const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); - const [cloudConnectors, setCloudConnectors] = useState<{ - [key: string]: { - name: string; - available: boolean; - connected: boolean; - hasToken: boolean; - }; - }>({}); - const fileInputRef = useRef(null); - const dropdownRef = useRef(null); + const { addTask } = useTask(); + const { refetch: refetchTasks } = useGetTasksQuery(); + const router = useRouter(); + const [isOpen, setIsOpen] = useState(false); + const [showFolderDialog, setShowFolderDialog] = useState(false); + const [showS3Dialog, setShowS3Dialog] = useState(false); + const [awsEnabled, setAwsEnabled] = useState(false); + const [folderPath, setFolderPath] = useState("/app/documents/"); + const [bucketUrl, setBucketUrl] = useState("s3://"); + const [folderLoading, setFolderLoading] = useState(false); + const [s3Loading, setS3Loading] = useState(false); + const [fileUploading, setFileUploading] = useState(false); + const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); + const [cloudConnectors, setCloudConnectors] = useState<{ + [key: string]: { + name: string; + available: boolean; + connected: boolean; + hasToken: boolean; + }; + }>({}); + const fileInputRef = useRef(null); + const dropdownRef = useRef(null); - // Check AWS availability and cloud connectors on mount - useEffect(() => { - const checkAvailability = async () => { - try { - // Check AWS - const awsRes = await fetch("/api/upload_options"); - if (awsRes.ok) { - const awsData = await awsRes.json(); - setAwsEnabled(Boolean(awsData.aws)); - } + // Check AWS availability and cloud connectors on mount + useEffect(() => { + const checkAvailability = async () => { + try { + // Check AWS + const awsRes = await fetch("/api/upload_options"); + if (awsRes.ok) { + const awsData = await awsRes.json(); + setAwsEnabled(Boolean(awsData.aws)); + } - // Check cloud connectors - const connectorsRes = await fetch("/api/connectors"); - if (connectorsRes.ok) { - const connectorsResult = await connectorsRes.json(); - const cloudConnectorTypes = [ - "google_drive", - "onedrive", - "sharepoint", - ]; - const connectorInfo: { - [key: string]: { - name: string; - available: boolean; - connected: boolean; - hasToken: boolean; - }; - } = {}; + // Check cloud connectors + const connectorsRes = await fetch("/api/connectors"); + if (connectorsRes.ok) { + const connectorsResult = await connectorsRes.json(); + const cloudConnectorTypes = [ + "google_drive", + "onedrive", + "sharepoint", + ]; + const connectorInfo: { + [key: string]: { + name: string; + available: boolean; + connected: boolean; + hasToken: boolean; + }; + } = {}; - for (const type of cloudConnectorTypes) { - if (connectorsResult.connectors[type]) { - connectorInfo[type] = { - name: connectorsResult.connectors[type].name, - available: connectorsResult.connectors[type].available, - connected: false, - hasToken: false, - }; + for (const type of cloudConnectorTypes) { + if (connectorsResult.connectors[type]) { + connectorInfo[type] = { + name: connectorsResult.connectors[type].name, + available: connectorsResult.connectors[type].available, + connected: false, + hasToken: false, + }; - // Check connection status - try { - const statusRes = await fetch(`/api/connectors/${type}/status`); - if (statusRes.ok) { - const statusData = await statusRes.json(); - const connections = statusData.connections || []; - const activeConnection = connections.find( - (conn: { is_active: boolean; connection_id: string }) => - conn.is_active - ); - const isConnected = activeConnection !== undefined; + // Check connection status + try { + const statusRes = await fetch(`/api/connectors/${type}/status`); + if (statusRes.ok) { + const statusData = await statusRes.json(); + const connections = statusData.connections || []; + const activeConnection = connections.find( + (conn: { is_active: boolean; connection_id: string }) => + conn.is_active, + ); + const isConnected = activeConnection !== undefined; - if (isConnected && activeConnection) { - connectorInfo[type].connected = true; + if (isConnected && activeConnection) { + connectorInfo[type].connected = true; - // Check token availability - try { - const tokenRes = await fetch( - `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}` - ); - if (tokenRes.ok) { - const tokenData = await tokenRes.json(); - if (tokenData.access_token) { - connectorInfo[type].hasToken = true; - } - } - } catch { - // Token check failed - } - } - } - } catch { - // Status check failed - } - } - } + // Check token availability + try { + const tokenRes = await fetch( + `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`, + ); + if (tokenRes.ok) { + const tokenData = await tokenRes.json(); + if (tokenData.access_token) { + connectorInfo[type].hasToken = true; + } + } + } catch { + // Token check failed + } + } + } + } catch { + // Status check failed + } + } + } - setCloudConnectors(connectorInfo); - } - } catch (err) { - console.error("Failed to check availability", err); - } - }; - checkAvailability(); - }, []); + setCloudConnectors(connectorInfo); + } + } catch (err) { + console.error("Failed to check availability", err); + } + }; + checkAvailability(); + }, []); - // Handle click outside to close dropdown - useEffect(() => { - const handleClickOutside = (event: MouseEvent) => { - if ( - dropdownRef.current && - !dropdownRef.current.contains(event.target as Node) - ) { - setIsOpen(false); - } - }; + // Handle click outside to close dropdown + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + } + }; - if (isOpen) { - document.addEventListener("mousedown", handleClickOutside); - return () => - document.removeEventListener("mousedown", handleClickOutside); - } - }, [isOpen]); + if (isOpen) { + document.addEventListener("mousedown", handleClickOutside); + return () => + document.removeEventListener("mousedown", handleClickOutside); + } + }, [isOpen]); - const handleFileUpload = () => { - fileInputRef.current?.click(); - }; + const handleFileUpload = () => { + fileInputRef.current?.click(); + }; - const handleFileChange = async (e: React.ChangeEvent) => { - const files = e.target.files; - if (files && files.length > 0) { - // Close dropdown and disable button immediately after file selection - setIsOpen(false); - setFileUploading(true); + const handleFileChange = async (e: React.ChangeEvent) => { + const files = e.target.files; + if (files && files.length > 0) { + // Close dropdown and disable button immediately after file selection + setIsOpen(false); + setFileUploading(true); - // Trigger the same file upload event as the chat page - window.dispatchEvent( - new CustomEvent("fileUploadStart", { - detail: { filename: files[0].name }, - }) - ); + // Trigger the same file upload event as the chat page + window.dispatchEvent( + new CustomEvent("fileUploadStart", { + detail: { filename: files[0].name }, + }), + ); - try { - const formData = new FormData(); - formData.append("file", files[0]); + try { + const formData = new FormData(); + formData.append("file", files[0]); - // Use router upload and ingest endpoint (automatically routes based on configuration) - const uploadIngestRes = await fetch("/api/router/upload_ingest", { - method: "POST", - body: formData, - }); + // Use router upload and ingest endpoint (automatically routes based on configuration) + const uploadIngestRes = await fetch("/api/router/upload_ingest", { + method: "POST", + body: formData, + }); - const uploadIngestJson = await uploadIngestRes.json(); + const uploadIngestJson = await uploadIngestRes.json(); - if (!uploadIngestRes.ok) { - throw new Error( - uploadIngestJson?.error || "Upload and ingest failed" - ); - } + if (!uploadIngestRes.ok) { + throw new Error( + uploadIngestJson?.error || "Upload and ingest failed", + ); + } - // Extract results from the response - handle both unified and simple formats - const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id; - const filePath = - uploadIngestJson?.upload?.path || - uploadIngestJson?.path || - "uploaded"; - const runJson = uploadIngestJson?.ingestion; - const deleteResult = uploadIngestJson?.deletion; + // Extract results from the response - handle both unified and simple formats + const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id || uploadIngestJson?.task_id; + const filePath = + uploadIngestJson?.upload?.path || + uploadIngestJson?.path || + "uploaded"; + const runJson = uploadIngestJson?.ingestion; + const deleteResult = uploadIngestJson?.deletion; + console.log("c", uploadIngestJson ) + if (!fileId) { + throw new Error("Upload successful but no file id returned"); + } + // Check if ingestion actually succeeded + if ( + runJson && + runJson.status !== "COMPLETED" && + runJson.status !== "SUCCESS" + ) { + const errorMsg = runJson.error || "Ingestion pipeline failed"; + throw new Error( + `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`, + ); + } + // Log deletion status if provided + if (deleteResult) { + if (deleteResult.status === "deleted") { + console.log( + "File successfully cleaned up from Langflow:", + deleteResult.file_id, + ); + } else if (deleteResult.status === "delete_failed") { + console.warn( + "Failed to cleanup file from Langflow:", + deleteResult.error, + ); + } + } + // Notify UI + window.dispatchEvent( + new CustomEvent("fileUploaded", { + detail: { + file: files[0], + result: { + file_id: fileId, + file_path: filePath, + run: runJson, + deletion: deleteResult, + unified: true, + }, + }, + }), + ); - if (!fileId) { - throw new Error("Upload successful but no file id returned"); - } + refetchTasks(); + } catch (error) { + window.dispatchEvent( + new CustomEvent("fileUploadError", { + detail: { + filename: files[0].name, + error: error instanceof Error ? error.message : "Upload failed", + }, + }), + ); + } finally { + window.dispatchEvent(new CustomEvent("fileUploadComplete")); + setFileUploading(false); + } + } - // Check if ingestion actually succeeded - if ( - runJson && - runJson.status !== "COMPLETED" && - runJson.status !== "SUCCESS" - ) { - const errorMsg = runJson.error || "Ingestion pipeline failed"; - throw new Error( - `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.` - ); - } + // Reset file input + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + }; - // Log deletion status if provided - if (deleteResult) { - if (deleteResult.status === "deleted") { - console.log( - "File successfully cleaned up from Langflow:", - deleteResult.file_id - ); - } else if (deleteResult.status === "delete_failed") { - console.warn( - "Failed to cleanup file from Langflow:", - deleteResult.error - ); - } - } + const handleFolderUpload = async () => { + if (!folderPath.trim()) return; - // Notify UI - window.dispatchEvent( - new CustomEvent("fileUploaded", { - detail: { - file: files[0], - result: { - file_id: fileId, - file_path: filePath, - run: runJson, - deletion: deleteResult, - unified: true, - }, - }, - }) - ); + setFolderLoading(true); + setShowFolderDialog(false); - // Trigger search refresh after successful ingestion - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } catch (error) { - window.dispatchEvent( - new CustomEvent("fileUploadError", { - detail: { - filename: files[0].name, - error: error instanceof Error ? error.message : "Upload failed", - }, - }) - ); - } finally { - window.dispatchEvent(new CustomEvent("fileUploadComplete")); - setFileUploading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - } + try { + const response = await fetch("/api/upload_path", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ path: folderPath }), + }); - // Reset file input - if (fileInputRef.current) { - fileInputRef.current.value = ""; - } - }; + const result = await response.json(); - const handleFolderUpload = async () => { - if (!folderPath.trim()) return; + if (response.status === 201) { + const taskId = result.task_id || result.id; - setFolderLoading(true); - setShowFolderDialog(false); + if (!taskId) { + throw new Error("No task ID received from server"); + } - try { - const response = await fetch("/api/upload_path", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ path: folderPath }), - }); + addTask(taskId); + setFolderPath(""); + // Refetch tasks to show the new task + refetchTasks(); + } else if (response.ok) { + setFolderPath(""); + // Refetch tasks even for direct uploads in case tasks were created + refetchTasks(); + } else { + console.error("Folder upload failed:", result.error); + if (response.status === 400) { + toast.error("Upload failed", { + description: result.error || "Bad request", + }); + } + } + } catch (error) { + console.error("Folder upload error:", error); + } finally { + setFolderLoading(false); + } + }; - const result = await response.json(); + const handleS3Upload = async () => { + if (!bucketUrl.trim()) return; - if (response.status === 201) { - const taskId = result.task_id || result.id; + setS3Loading(true); + setShowS3Dialog(false); - if (!taskId) { - throw new Error("No task ID received from server"); - } + try { + const response = await fetch("/api/upload_bucket", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ s3_url: bucketUrl }), + }); - addTask(taskId); - setFolderPath(""); - // Trigger search refresh after successful folder processing starts - console.log( - "Folder upload successful, dispatching knowledgeUpdated event" - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else if (response.ok) { - setFolderPath(""); - console.log( - "Folder upload successful (direct), dispatching knowledgeUpdated event" - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else { - console.error("Folder upload failed:", result.error); - if (response.status === 400) { - toast.error("Upload failed", { - description: result.error || "Bad request", - }); - } - } - } catch (error) { - console.error("Folder upload error:", error); - } finally { - setFolderLoading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - }; + const result = await response.json(); - const handleS3Upload = async () => { - if (!bucketUrl.trim()) return; + if (response.status === 201) { + const taskId = result.task_id || result.id; - setS3Loading(true); - setShowS3Dialog(false); + if (!taskId) { + throw new Error("No task ID received from server"); + } - try { - const response = await fetch("/api/upload_bucket", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ s3_url: bucketUrl }), - }); + addTask(taskId); + setBucketUrl("s3://"); + // Refetch tasks to show the new task + refetchTasks(); + } else { + console.error("S3 upload failed:", result.error); + if (response.status === 400) { + toast.error("Upload failed", { + description: result.error || "Bad request", + }); + } + } + } catch (error) { + console.error("S3 upload error:", error); + } finally { + setS3Loading(false); + } + }; - const result = await response.json(); + const cloudConnectorItems = Object.entries(cloudConnectors) + .filter(([, info]) => info.available) + .map(([type, info]) => ({ + label: info.name, + icon: PlugZap, + onClick: async () => { + setIsOpen(false); + if (info.connected && info.hasToken) { + setIsNavigatingToCloud(true); + try { + router.push(`/upload/${type}`); + // Keep loading state for a short time to show feedback + setTimeout(() => setIsNavigatingToCloud(false), 1000); + } catch { + setIsNavigatingToCloud(false); + } + } else { + router.push("/settings"); + } + }, + disabled: !info.connected || !info.hasToken, + tooltip: !info.connected + ? `Connect ${info.name} in Settings first` + : !info.hasToken + ? `Reconnect ${info.name} - access token required` + : undefined, + })); - if (response.status === 201) { - const taskId = result.task_id || result.id; + const menuItems = [ + { + label: "Add File", + icon: Upload, + onClick: handleFileUpload, + }, + { + label: "Process Folder", + icon: FolderOpen, + onClick: () => { + setIsOpen(false); + setShowFolderDialog(true); + }, + }, + ...(awsEnabled + ? [ + { + label: "Process S3 Bucket", + icon: Cloud, + onClick: () => { + setIsOpen(false); + setShowS3Dialog(true); + }, + }, + ] + : []), + ...cloudConnectorItems, + ]; - if (!taskId) { - throw new Error("No task ID received from server"); - } + // Comprehensive loading state + const isLoading = + fileUploading || folderLoading || s3Loading || isNavigatingToCloud; - addTask(taskId); - setBucketUrl("s3://"); - // Trigger search refresh after successful S3 processing starts - console.log("S3 upload successful, dispatching knowledgeUpdated event"); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - } else { - console.error("S3 upload failed:", result.error); - if (response.status === 400) { - toast.error("Upload failed", { - description: result.error || "Bad request", - }); - } - } - } catch (error) { - console.error("S3 upload error:", error); - } finally { - setS3Loading(false); - // Don't call refetchSearch() here - the knowledgeUpdated event will handle it - } - }; + return ( + <> +
+ - const cloudConnectorItems = Object.entries(cloudConnectors) - .filter(([, info]) => info.available) - .map(([type, info]) => ({ - label: info.name, - icon: PlugZap, - onClick: async () => { - setIsOpen(false); - if (info.connected && info.hasToken) { - setIsNavigatingToCloud(true); - try { - router.push(`/upload/${type}`); - // Keep loading state for a short time to show feedback - setTimeout(() => setIsNavigatingToCloud(false), 1000); - } catch { - setIsNavigatingToCloud(false); - } - } else { - router.push("/settings"); - } - }, - disabled: !info.connected || !info.hasToken, - tooltip: !info.connected - ? `Connect ${info.name} in Settings first` - : !info.hasToken - ? `Reconnect ${info.name} - access token required` - : undefined, - })); + {isOpen && !isLoading && ( +
+
+ {menuItems.map((item, index) => ( + + ))} +
+
+ )} - const menuItems = [ - { - label: "Add File", - icon: Upload, - onClick: handleFileUpload, - }, - { - label: "Process Folder", - icon: FolderOpen, - onClick: () => { - setIsOpen(false); - setShowFolderDialog(true); - }, - }, - ...(awsEnabled - ? [ - { - label: "Process S3 Bucket", - icon: Cloud, - onClick: () => { - setIsOpen(false); - setShowS3Dialog(true); - }, - }, - ] - : []), - ...cloudConnectorItems, - ]; + +
- // Comprehensive loading state - const isLoading = - fileUploading || folderLoading || s3Loading || isNavigatingToCloud; + {/* Process Folder Dialog */} + + + + + + Process Folder + + + Process all documents in a folder path + + +
+
+ + setFolderPath(e.target.value)} + /> +
+
+ + +
+
+
+
- return ( - <> -
- - - {isOpen && !isLoading && ( -
-
- {menuItems.map((item, index) => ( - - ))} -
-
- )} - - -
- - {/* Process Folder Dialog */} - - - - - - Process Folder - - - Process all documents in a folder path - - -
-
- - setFolderPath(e.target.value)} - /> -
-
- - -
-
-
-
- - {/* Process S3 Bucket Dialog */} - - - - - - Process S3 Bucket - - - Process all documents from an S3 bucket. AWS credentials must be - configured. - - -
-
- - setBucketUrl(e.target.value)} - /> -
-
- - -
-
-
-
- - ); + {/* Process S3 Bucket Dialog */} + + + + + + Process S3 Bucket + + + Process all documents from an S3 bucket. AWS credentials must be + configured. + + +
+
+ + setBucketUrl(e.target.value)} + /> +
+
+ + +
+
+
+
+ + ); } diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index 5eb10ea9..b3275422 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -7,33 +7,18 @@ import { useCallback, useContext, useEffect, + useRef, useState, } from "react"; import { toast } from "sonner"; +import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation"; +import { + type Task, + useGetTasksQuery, +} from "@/app/api/queries/useGetTasksQuery"; import { useAuth } from "@/contexts/auth-context"; -export interface Task { - task_id: string; - status: - | "pending" - | "running" - | "processing" - | "completed" - | "failed" - | "error"; - total_files?: number; - processed_files?: number; - successful_files?: number; - failed_files?: number; - running_files?: number; - pending_files?: number; - created_at: string; - updated_at: string; - duration_seconds?: number; - result?: Record; - error?: string; - files?: Record>; -} +// Task interface is now imported from useGetTasksQuery export interface TaskFile { filename: string; @@ -58,20 +43,45 @@ interface TaskContextType { isFetching: boolean; isMenuOpen: boolean; toggleMenu: () => void; + // React Query states + isLoading: boolean; + error: Error | null; } const TaskContext = createContext(undefined); export function TaskProvider({ children }: { children: React.ReactNode }) { - const [tasks, setTasks] = useState([]); const [files, setFiles] = useState([]); - const [isPolling, setIsPolling] = useState(false); - const [isFetching, setIsFetching] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false); + const previousTasksRef = useRef([]); const { isAuthenticated, isNoAuthMode } = useAuth(); const queryClient = useQueryClient(); + // Use React Query hooks + const { + data: tasks = [], + isLoading, + error, + refetch: refetchTasks, + isFetching, + } = useGetTasksQuery({ + enabled: isAuthenticated || isNoAuthMode, + }); + + const cancelTaskMutation = useCancelTaskMutation({ + onSuccess: () => { + toast.success("Task cancelled", { + description: "Task has been cancelled successfully", + }); + }, + onError: (error) => { + toast.error("Failed to cancel task", { + description: error.message, + }); + }, + }); + const refetchSearch = useCallback(() => { queryClient.invalidateQueries({ queryKey: ["search"], @@ -99,252 +109,171 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { [], ); - const fetchTasks = useCallback(async () => { - if (!isAuthenticated && !isNoAuthMode) return; - - setIsFetching(true); - try { - const response = await fetch("/api/tasks"); - if (response.ok) { - const data = await response.json(); - const newTasks = data.tasks || []; - - // Update tasks and check for status changes in the same state update - setTasks((prevTasks) => { - // Check for newly completed tasks to show toasts - if (prevTasks.length > 0) { - newTasks.forEach((newTask: Task) => { - const oldTask = prevTasks.find( - (t) => t.task_id === newTask.task_id, - ); - - // Update or add files from task.files if available - if (newTask.files && typeof newTask.files === "object") { - const taskFileEntries = Object.entries(newTask.files); - const now = new Date().toISOString(); - - taskFileEntries.forEach(([filePath, fileInfo]) => { - if (typeof fileInfo === "object" && fileInfo) { - const fileName = filePath.split("/").pop() || filePath; - const fileStatus = fileInfo.status as string; - - // Map backend file status to our TaskFile status - let mappedStatus: TaskFile["status"]; - switch (fileStatus) { - case "pending": - case "running": - mappedStatus = "processing"; - break; - case "completed": - mappedStatus = "active"; - break; - case "failed": - mappedStatus = "failed"; - break; - default: - mappedStatus = "processing"; - } - - setFiles((prevFiles) => { - const existingFileIndex = prevFiles.findIndex( - (f) => - f.source_url === filePath && - f.task_id === newTask.task_id, - ); - - // Detect connector type based on file path or other indicators - let connectorType = "local"; - if (filePath.includes("/") && !filePath.startsWith("/")) { - // Likely S3 key format (bucket/path/file.ext) - connectorType = "s3"; - } - - const fileEntry: TaskFile = { - filename: fileName, - mimetype: "", // We don't have this info from the task - source_url: filePath, - size: 0, // We don't have this info from the task - connector_type: connectorType, - status: mappedStatus, - task_id: newTask.task_id, - created_at: - typeof fileInfo.created_at === "string" - ? fileInfo.created_at - : now, - updated_at: - typeof fileInfo.updated_at === "string" - ? fileInfo.updated_at - : now, - }; - - if (existingFileIndex >= 0) { - // Update existing file - const updatedFiles = [...prevFiles]; - updatedFiles[existingFileIndex] = fileEntry; - return updatedFiles; - } else { - // Add new file - return [...prevFiles, fileEntry]; - } - }); - } - }); - } - - if ( - oldTask && - oldTask.status !== "completed" && - newTask.status === "completed" - ) { - // Task just completed - show success toast - toast.success("Task completed successfully", { - description: `Task ${newTask.task_id} has finished processing.`, - action: { - label: "View", - onClick: () => console.log("View task", newTask.task_id), - }, - }); - refetchSearch(); - // Dispatch knowledge updated event for all knowledge-related pages - console.log( - "Task completed successfully, dispatching knowledgeUpdated event", - ); - window.dispatchEvent(new CustomEvent("knowledgeUpdated")); - - // Remove files for this completed task from the files list - setFiles((prevFiles) => - prevFiles.filter((file) => file.task_id !== newTask.task_id), - ); - } else if ( - oldTask && - oldTask.status !== "failed" && - oldTask.status !== "error" && - (newTask.status === "failed" || newTask.status === "error") - ) { - // Task just failed - show error toast - toast.error("Task failed", { - description: `Task ${newTask.task_id} failed: ${ - newTask.error || "Unknown error" - }`, - }); - - // Files will be updated to failed status by the file parsing logic above - } - }); - } - - return newTasks; - }); - } - } catch (error) { - console.error("Failed to fetch tasks:", error); - } finally { - setIsFetching(false); + // Handle task status changes and file updates + useEffect(() => { + if (tasks.length === 0) { + // Store current tasks as previous for next comparison + previousTasksRef.current = tasks; + return; } - }, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop! + console.log(tasks, previousTasksRef.current); - const addTask = useCallback((taskId: string) => { - // Immediately start aggressive polling for the new task - let pollAttempts = 0; - const maxPollAttempts = 30; // Poll for up to 30 seconds + // Check for task status changes by comparing with previous tasks + tasks.forEach((currentTask) => { + const previousTask = previousTasksRef.current.find( + (prev) => prev.task_id === currentTask.task_id, + ); - const aggressivePoll = async () => { - try { - const response = await fetch("/api/tasks"); - if (response.ok) { - const data = await response.json(); - const newTasks = data.tasks || []; - const foundTask = newTasks.find( - (task: Task) => task.task_id === taskId, - ); + // Only show toasts if we have previous data and status has changed + if (((previousTask && previousTask.status !== currentTask.status) || (!previousTask && previousTasksRef.current.length !== 0))) { + console.log("task status changed", currentTask.status); + // Process files from failed task and add them to files list + if (currentTask.files && typeof currentTask.files === "object") { + console.log("processing files", currentTask.files); + const taskFileEntries = Object.entries(currentTask.files); + const now = new Date().toISOString(); - if (foundTask) { - // Task found! Update the tasks state - setTasks((prevTasks) => { - // Check if task is already in the list - const exists = prevTasks.some((t) => t.task_id === taskId); - if (!exists) { - return [...prevTasks, foundTask]; + taskFileEntries.forEach(([filePath, fileInfo]) => { + if (typeof fileInfo === "object" && fileInfo) { + const fileName = filePath.split("/").pop() || filePath; + const fileStatus = fileInfo.status as string; + + // Map backend file status to our TaskFile status + let mappedStatus: TaskFile["status"]; + switch (fileStatus) { + case "pending": + case "running": + mappedStatus = "processing"; + break; + case "completed": + mappedStatus = "active"; + break; + case "failed": + mappedStatus = "failed"; + break; + default: + mappedStatus = "processing"; } - // Update existing task - return prevTasks.map((t) => - t.task_id === taskId ? foundTask : t, - ); - }); - return; // Stop polling, we found it - } + + setFiles((prevFiles) => { + const existingFileIndex = prevFiles.findIndex( + (f) => + f.source_url === filePath && + f.task_id === currentTask.task_id, + ); + + // Detect connector type based on file path or other indicators + let connectorType = "local"; + if (filePath.includes("/") && !filePath.startsWith("/")) { + // Likely S3 key format (bucket/path/file.ext) + connectorType = "s3"; + } + + const fileEntry: TaskFile = { + filename: fileName, + mimetype: "", // We don't have this info from the task + source_url: filePath, + size: 0, // We don't have this info from the task + connector_type: connectorType, + status: mappedStatus, + task_id: currentTask.task_id, + created_at: + typeof fileInfo.created_at === "string" + ? fileInfo.created_at + : now, + updated_at: + typeof fileInfo.updated_at === "string" + ? fileInfo.updated_at + : now, + }; + + if (existingFileIndex >= 0) { + // Update existing file + const updatedFiles = [...prevFiles]; + updatedFiles[existingFileIndex] = fileEntry; + return updatedFiles; + } else { + // Add new file + return [...prevFiles, fileEntry]; + } + }); + } + }); + } + if ( + previousTask && previousTask.status !== "completed" && + currentTask.status === "completed" + ) { + // Task just completed - show success toast + toast.success("Task completed successfully", { + description: `Task ${currentTask.task_id} has finished processing.`, + action: { + label: "View", + onClick: () => console.log("View task", currentTask.task_id), + }, + }); + refetchSearch(); + // Remove files for this completed task from the files list + // setFiles((prevFiles) => + // prevFiles.filter((file) => file.task_id !== currentTask.task_id), + // ); + } else if ( + previousTask && previousTask.status !== "failed" && + previousTask.status !== "error" && + (currentTask.status === "failed" || currentTask.status === "error") + ) { + // Task just failed - show error toast + toast.error("Task failed", { + description: `Task ${currentTask.task_id} failed: ${ + currentTask.error || "Unknown error" + }`, + }); } - } catch (error) { - console.error("Aggressive polling failed:", error); } + }); - pollAttempts++; - if (pollAttempts < maxPollAttempts) { - // Continue polling every 1 second for new tasks - setTimeout(aggressivePoll, 1000); - } - }; + // Store current tasks as previous for next comparison + previousTasksRef.current = tasks; + }, [tasks, refetchSearch]); - // Start aggressive polling after a short delay to allow backend to process - setTimeout(aggressivePoll, 500); - }, []); + const addTask = useCallback( + (_taskId: string) => { + // React Query will automatically handle polling when tasks are active + // Just trigger a refetch to get the latest data + refetchTasks(); + }, + [refetchTasks], + ); const refreshTasks = useCallback(async () => { - await fetchTasks(); - }, [fetchTasks]); + await refetchTasks(); + }, [refetchTasks]); - const removeTask = useCallback((taskId: string) => { - setTasks((prev) => prev.filter((task) => task.task_id !== taskId)); + const removeTask = useCallback((_taskId: string) => { + // This is now handled by React Query automatically + // Tasks will be removed from the list when they're no longer returned by the API }, []); const cancelTask = useCallback( async (taskId: string) => { - try { - const response = await fetch(`/api/tasks/${taskId}/cancel`, { - method: "POST", - }); - - if (response.ok) { - // Immediately refresh tasks to show the updated status - await fetchTasks(); - toast.success("Task cancelled", { - description: `Task ${taskId.substring(0, 8)}... has been cancelled`, - }); - } else { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.error || "Failed to cancel task"); - } - } catch (error) { - console.error("Failed to cancel task:", error); - toast.error("Failed to cancel task", { - description: error instanceof Error ? error.message : "Unknown error", - }); - } + cancelTaskMutation.mutate({ taskId }); }, - [fetchTasks], + [cancelTaskMutation], ); const toggleMenu = useCallback(() => { setIsMenuOpen((prev) => !prev); }, []); - // Periodic polling for task updates - useEffect(() => { - if (!isAuthenticated && !isNoAuthMode) return; - - setIsPolling(true); - - // Initial fetch - fetchTasks(); - - // Set up polling interval - every 3 seconds (more responsive for active tasks) - const interval = setInterval(fetchTasks, 3000); - - return () => { - clearInterval(interval); - setIsPolling(false); - }; - }, [isAuthenticated, isNoAuthMode, fetchTasks]); + // Determine if we're polling based on React Query's refetch interval + const isPolling = + isFetching && + tasks.some( + (task) => + task.status === "pending" || + task.status === "running" || + task.status === "processing", + ); const value: TaskContextType = { tasks, @@ -358,6 +287,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { isFetching, isMenuOpen, toggleMenu, + isLoading, + error, }; return {children}; From db715c122bf42dfd9a049607eda247fd5f18ff25 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 18:00:09 -0300 Subject: [PATCH 04/23] made failed and processing files not accessible and deletable --- frontend/src/app/knowledge/page.tsx | 604 ++++++++++++++-------------- 1 file changed, 307 insertions(+), 297 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 1b8b60ef..41485289 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -17,270 +17,280 @@ import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/agGridStyles.css"; import { toast } from "sonner"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; +import { filterAccentClasses } from "@/components/knowledge-filter-panel"; import { StatusBadge } from "@/components/ui/status-badge"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; -import { filterAccentClasses } from "@/components/knowledge-filter-panel"; // Function to get the appropriate icon for a connector type function getSourceIcon(connectorType?: string) { - switch (connectorType) { - case "google_drive": - return ( - - ); - case "onedrive": - return ( - - ); - case "sharepoint": - return ; - case "s3": - return ; - default: - return ( - - ); - } + switch (connectorType) { + case "google_drive": + return ( + + ); + case "onedrive": + return ( + + ); + case "sharepoint": + return ; + case "s3": + return ; + default: + return ( + + ); + } } function SearchPage() { - const router = useRouter(); - const { isMenuOpen, files: taskFiles } = useTask(); - const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = - useKnowledgeFilter(); - const [selectedRows, setSelectedRows] = useState([]); - const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false); + const router = useRouter(); + const { isMenuOpen, files: taskFiles } = useTask(); + const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = + useKnowledgeFilter(); + const [selectedRows, setSelectedRows] = useState([]); + const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false); - const deleteDocumentMutation = useDeleteDocument(); + const deleteDocumentMutation = useDeleteDocument(); - const { data = [], isFetching } = useGetSearchQuery( - parsedFilterData?.query || "*", - parsedFilterData - ); + const { data = [], isFetching } = useGetSearchQuery( + parsedFilterData?.query || "*", + parsedFilterData, + ); - const handleTableSearch = (e: ChangeEvent) => { - gridRef.current?.api.setGridOption("quickFilterText", e.target.value); - }; + const handleTableSearch = (e: ChangeEvent) => { + gridRef.current?.api.setGridOption("quickFilterText", e.target.value); + }; - // Convert TaskFiles to File format and merge with backend results - const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { - return { - filename: taskFile.filename, - mimetype: taskFile.mimetype, - source_url: taskFile.source_url, - size: taskFile.size, - connector_type: taskFile.connector_type, - status: taskFile.status, - }; - }); + // Convert TaskFiles to File format and merge with backend results + const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { + return { + filename: taskFile.filename, + mimetype: taskFile.mimetype, + source_url: taskFile.source_url, + size: taskFile.size, + connector_type: taskFile.connector_type, + status: taskFile.status, + }; + }); - const backendFiles = data as File[]; + const backendFiles = data as File[]; - const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { - return ( - taskFile.status !== "active" && - !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename - ) - ); - }); + const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { + return ( + taskFile.status !== "active" && + !backendFiles.some( + (backendFile) => backendFile.filename === taskFile.filename, + ) + ); + }); - // Combine task files first, then backend files - const fileResults = [...backendFiles, ...filteredTaskFiles]; + // Combine task files first, then backend files + const fileResults = [...backendFiles, ...filteredTaskFiles]; - const gridRef = useRef(null); + const gridRef = useRef(null); - const [columnDefs] = useState[]>([ - { - field: "filename", - headerName: "Source", - checkboxSelection: true, - headerCheckboxSelection: true, - initialFlex: 2, - minWidth: 220, - cellRenderer: ({ data, value }: CustomCellRendererProps) => { - return ( - - ); - }, - }, - { - field: "size", - headerName: "Size", - valueFormatter: (params) => - params.value ? `${Math.round(params.value / 1024)} KB` : "-", - }, - { - field: "mimetype", - headerName: "Type", - }, - { - field: "owner", - headerName: "Owner", - valueFormatter: (params) => - params.data?.owner_name || params.data?.owner_email || "—", - }, - { - field: "chunkCount", - headerName: "Chunks", - valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", - }, - { - field: "avgScore", - headerName: "Avg score", - initialFlex: 0.5, - cellRenderer: ({ value }: CustomCellRendererProps) => { - return ( - - {value?.toFixed(2) ?? "-"} - - ); - }, - }, - { - field: "status", - headerName: "Status", - cellRenderer: ({ data }: CustomCellRendererProps) => { - // Default to 'active' status if no status is provided - const status = data?.status || "active"; - return ; - }, - }, - { - cellRenderer: ({ data }: CustomCellRendererProps) => { - return ; - }, - cellStyle: { - alignItems: "center", - display: "flex", - justifyContent: "center", - padding: 0, - }, - colId: "actions", - filter: false, - minWidth: 0, - width: 40, - resizable: false, - sortable: false, - initialFlex: 0, - }, - ]); + const [columnDefs] = useState[]>([ + { + field: "filename", + headerName: "Source", + checkboxSelection: (data) => (data?.data?.status || "active") === "active", + headerCheckboxSelection: true, + initialFlex: 2, + minWidth: 220, + cellRenderer: ({ data, value }: CustomCellRendererProps) => { + return ( +
{((data?.status || "active") !== "active") && +
+ } +
+ ); + }, + }, + { + field: "size", + headerName: "Size", + valueFormatter: (params) => + params.value ? `${Math.round(params.value / 1024)} KB` : "-", + }, + { + field: "mimetype", + headerName: "Type", + }, + { + field: "owner", + headerName: "Owner", + valueFormatter: (params) => + params.data?.owner_name || params.data?.owner_email || "—", + }, + { + field: "chunkCount", + headerName: "Chunks", + valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", + }, + { + field: "avgScore", + headerName: "Avg score", + initialFlex: 0.5, + cellRenderer: ({ value }: CustomCellRendererProps) => { + return ( + + {value?.toFixed(2) ?? "-"} + + ); + }, + }, + { + field: "status", + headerName: "Status", + cellRenderer: ({ data }: CustomCellRendererProps) => { + // Default to 'active' status if no status is provided + const status = data?.status || "active"; + return ; + }, + }, + { + cellRenderer: ({ data }: CustomCellRendererProps) => { + const status = data?.status || "active"; + if (status !== "active") { + return null; + } + return ; + }, + cellStyle: { + alignItems: "center", + display: "flex", + justifyContent: "center", + padding: 0, + }, + colId: "actions", + filter: false, + minWidth: 0, + width: 40, + resizable: false, + sortable: false, + initialFlex: 0, + }, + ]); - const defaultColDef: ColDef = { - resizable: false, - suppressMovable: true, - initialFlex: 1, - minWidth: 100, - }; + const defaultColDef: ColDef = { + resizable: false, + suppressMovable: true, + initialFlex: 1, + minWidth: 100, + }; - const onSelectionChanged = useCallback(() => { - if (gridRef.current) { - const selectedNodes = gridRef.current.api.getSelectedRows(); - setSelectedRows(selectedNodes); - } - }, []); + const onSelectionChanged = useCallback(() => { + if (gridRef.current) { + const selectedNodes = gridRef.current.api.getSelectedRows(); + setSelectedRows(selectedNodes); + } + }, []); - const handleBulkDelete = async () => { - if (selectedRows.length === 0) return; + const handleBulkDelete = async () => { + if (selectedRows.length === 0) return; - try { - // Delete each file individually since the API expects one filename at a time - const deletePromises = selectedRows.map((row) => - deleteDocumentMutation.mutateAsync({ filename: row.filename }) - ); + try { + // Delete each file individually since the API expects one filename at a time + const deletePromises = selectedRows.map((row) => + deleteDocumentMutation.mutateAsync({ filename: row.filename }), + ); - await Promise.all(deletePromises); + await Promise.all(deletePromises); - toast.success( - `Successfully deleted ${selectedRows.length} document${ - selectedRows.length > 1 ? "s" : "" - }` - ); - setSelectedRows([]); - setShowBulkDeleteDialog(false); + toast.success( + `Successfully deleted ${selectedRows.length} document${ + selectedRows.length > 1 ? "s" : "" + }`, + ); + setSelectedRows([]); + setShowBulkDeleteDialog(false); - // Clear selection in the grid - if (gridRef.current) { - gridRef.current.api.deselectAll(); - } - } catch (error) { - toast.error( - error instanceof Error - ? error.message - : "Failed to delete some documents" - ); - } - }; + // Clear selection in the grid + if (gridRef.current) { + gridRef.current.api.deselectAll(); + } + } catch (error) { + toast.error( + error instanceof Error + ? error.message + : "Failed to delete some documents", + ); + } + }; - return ( -
-
-
-

Project Knowledge

- -
+ return ( +
+
+
+

Project Knowledge

+ +
- {/* Search Input Area */} -
-
-
- {selectedFilter?.name && ( -
- {selectedFilter?.name} - setSelectedFilter(null)} - /> -
- )} - - -
- {/* */} - {/* //TODO: Implement sync button */} - {/* */} - {selectedRows.length > 0 && ( - - )} -
-
- params.data.filename} - domLayout="normal" - onSelectionChanged={onSelectionChanged} - noRowsOverlayComponent={() => ( -
-
- No knowledge -
-
- Add files from local or your preferred cloud. -
-
- )} - /> -
+ {selectedRows.length > 0 && ( + + )} + +
+ params.data.filename} + domLayout="normal" + onSelectionChanged={onSelectionChanged} + noRowsOverlayComponent={() => ( +
+
+ No knowledge +
+
+ Add files from local or your preferred cloud. +
+
+ )} + /> +
- {/* Bulk Delete Confirmation Dialog */} - 1 ? "s" : "" - }? This will remove all chunks and data associated with these documents. This action cannot be undone. + {/* Bulk Delete Confirmation Dialog */} + 1 ? "s" : "" + }? This will remove all chunks and data associated with these documents. This action cannot be undone. Documents to be deleted: ${selectedRows.map((row) => `• ${row.filename}`).join("\n")}`} - confirmText="Delete All" - onConfirm={handleBulkDelete} - isLoading={deleteDocumentMutation.isPending} - /> -
- ); + confirmText="Delete All" + onConfirm={handleBulkDelete} + isLoading={deleteDocumentMutation.isPending} + /> + + ); } export default function ProtectedSearchPage() { - return ( - - - - ); + return ( + + + + ); } From 581879f5f6ffc5807096f8d93c481e13c4787ee3 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 18:04:45 -0300 Subject: [PATCH 05/23] fixed animated processing icon --- .../ui/animated-processing-icon.tsx | 52 +++++++------------ frontend/src/components/ui/status-badge.tsx | 2 +- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/frontend/src/components/ui/animated-processing-icon.tsx b/frontend/src/components/ui/animated-processing-icon.tsx index eb36b2ab..51815414 100644 --- a/frontend/src/components/ui/animated-processing-icon.tsx +++ b/frontend/src/components/ui/animated-processing-icon.tsx @@ -1,26 +1,16 @@ -interface AnimatedProcessingIconProps { - className?: string; - size?: number; -} +import type { SVGProps } from "react"; -export const AnimatedProcessingIcon = ({ - className = "", - size = 10, -}: AnimatedProcessingIconProps) => { - const width = Math.round((size * 6) / 10); - const height = size; - - return ( - - + + + + + + + ); }; diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx index d3b1a323..07dce58b 100644 --- a/frontend/src/components/ui/status-badge.tsx +++ b/frontend/src/components/ui/status-badge.tsx @@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => { }`} > {status === "processing" && ( - + )} {config.label} From 7d6feef9992b6bfeaf78db085a961dd5c4e72c4e Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 18:09:02 -0300 Subject: [PATCH 06/23] fixed status badge size --- frontend/src/components/ui/status-badge.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ui/status-badge.tsx b/frontend/src/components/ui/status-badge.tsx index 07dce58b..e57ad3b5 100644 --- a/frontend/src/components/ui/status-badge.tsx +++ b/frontend/src/components/ui/status-badge.tsx @@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => { }`} > {status === "processing" && ( - + )} {config.label} From 179a7403ccaec179e2d0cad3e88c29a3deca08f1 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Thu, 2 Oct 2025 18:09:18 -0300 Subject: [PATCH 07/23] fixed router, langflow files and processors to not use temp names --- src/api/langflow_files.py | 15 ++++++++------- src/api/router.py | 18 ++++++++++-------- src/models/processors.py | 8 ++------ 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/api/langflow_files.py b/src/api/langflow_files.py index 1c83724d..ae36bf04 100644 --- a/src/api/langflow_files.py +++ b/src/api/langflow_files.py @@ -189,19 +189,20 @@ async def upload_and_ingest_user_file( # Create temporary file for task processing import tempfile import os - + # Read file content content = await upload_file.read() - - # Create temporary file + + # Create temporary file with the actual filename (not a temp prefix) + # Store in temp directory but use the real filename + temp_dir = tempfile.gettempdir() safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) + temp_path = os.path.join(temp_dir, safe_filename) + try: # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: + with open(temp_path, 'wb') as temp_file: temp_file.write(content) logger.debug("Created temporary file for task processing", temp_path=temp_path) diff --git a/src/api/router.py b/src/api/router.py index 56789d41..857914c0 100644 --- a/src/api/router.py +++ b/src/api/router.py @@ -114,20 +114,22 @@ async def langflow_upload_ingest_task( temp_file_paths = [] try: + # Create temp directory reference once + temp_dir = tempfile.gettempdir() + for upload_file in upload_files: # Read file content content = await upload_file.read() - - # Create temporary file + + # Create temporary file with the actual filename (not a temp prefix) + # Store in temp directory but use the real filename safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") - temp_fd, temp_path = tempfile.mkstemp( - suffix=f"_{safe_filename}" - ) - + temp_path = os.path.join(temp_dir, safe_filename) + # Write content to temp file - with os.fdopen(temp_fd, 'wb') as temp_file: + with open(temp_path, 'wb') as temp_file: temp_file.write(content) - + temp_file_paths.append(temp_path) logger.debug( diff --git a/src/models/processors.py b/src/models/processors.py index ecec9c49..0ce8e33d 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -574,12 +574,8 @@ class LangflowFileProcessor(TaskProcessor): content = f.read() # Create file tuple for upload - temp_filename = os.path.basename(item) - # Extract original filename from temp file suffix (remove tmp prefix) - if "_" in temp_filename: - filename = temp_filename.split("_", 1)[1] # Get everything after first _ - else: - filename = temp_filename + # The temp file now has the actual filename, no need to extract it + filename = os.path.basename(item) content_type, _ = mimetypes.guess_type(filename) if not content_type: content_type = 'application/octet-stream' From b28831ec25351b3cdd4cb6fc8621a2963a984b59 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 10:11:39 -0300 Subject: [PATCH 08/23] fixed issues with already ingested files, fixed state issues --- frontend/src/app/knowledge/page.tsx | 11 +++++------ frontend/src/contexts/task-context.tsx | 9 +++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 41485289..c6d254c4 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -75,14 +75,13 @@ function SearchPage() { }; }); - const backendFiles = data as File[]; + const backendFiles = (data as File[]).filter((file) => !taskFilesAsFiles.some((taskFile) => taskFile.filename === file.filename && taskFile.status === "processing")); const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { return ( taskFile.status !== "active" && !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename, - ) + (backendFile) => backendFile.filename === taskFile.filename,) ); }); @@ -91,7 +90,7 @@ function SearchPage() { const gridRef = useRef(null); - const [columnDefs] = useState[]>([ + const columnDefs = [ { field: "filename", headerName: "Source", @@ -189,8 +188,8 @@ function SearchPage() { resizable: false, sortable: false, initialFlex: 0, - }, - ]); + } + ]; const defaultColDef: ColDef = { resizable: false, diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index b3275422..8928ae8f 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -212,11 +212,12 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { onClick: () => console.log("View task", currentTask.task_id), }, }); + setTimeout(() => { refetchSearch(); - // Remove files for this completed task from the files list - // setFiles((prevFiles) => - // prevFiles.filter((file) => file.task_id !== currentTask.task_id), - // ); + setFiles((prevFiles) => + prevFiles.filter((file) => file.task_id !== currentTask.task_id && file.status !== "failed"), + ); + }, 500); } else if ( previousTask && previousTask.status !== "failed" && previousTask.status !== "error" && From 589839fedc1199e6060e0b57e9bd9ee08d8081b1 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 15:34:54 -0300 Subject: [PATCH 09/23] implement duplicate handling on backend and frontend --- .../components/duplicate-handling-dialog.tsx | 71 ++++++ frontend/components/knowledge-dropdown.tsx | 233 ++++++++++++------ frontend/src/contexts/task-context.tsx | 3 +- src/api/documents.py | 92 +++++-- src/api/router.py | 13 +- src/main.py | 11 + src/models/processors.py | 139 +++++++++-- src/models/tasks.py | 3 +- src/services/task_service.py | 23 +- 9 files changed, 470 insertions(+), 118 deletions(-) create mode 100644 frontend/components/duplicate-handling-dialog.tsx diff --git a/frontend/components/duplicate-handling-dialog.tsx b/frontend/components/duplicate-handling-dialog.tsx new file mode 100644 index 00000000..2f92ea50 --- /dev/null +++ b/frontend/components/duplicate-handling-dialog.tsx @@ -0,0 +1,71 @@ +"use client"; + +import { RotateCcw } from "lucide-react"; +import type React from "react"; +import { Button } from "./ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "./ui/dialog"; + +interface DuplicateHandlingDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + filename: string; + onOverwrite: () => void | Promise; + isLoading?: boolean; +} + +export const DuplicateHandlingDialog: React.FC< + DuplicateHandlingDialogProps +> = ({ open, onOpenChange, filename, onOverwrite, isLoading = false }) => { + const handleOverwrite = async () => { + try { + await onOverwrite(); + onOpenChange(false); + } catch (error) { + // Error handling is done by the parent component + } + }; + + return ( + + + + Overwrite document + + Overwriting will replace the existing document with another version. + This can't be undone. + + + + + + + + + + ); +}; diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index 9b71ee81..0b106360 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -13,6 +13,7 @@ import { useRouter } from "next/navigation"; import { useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery"; +import { DuplicateHandlingDialog } from "@/components/duplicate-handling-dialog"; import { Button } from "@/components/ui/button"; import { Dialog, @@ -41,6 +42,7 @@ export function KnowledgeDropdown({ const [isOpen, setIsOpen] = useState(false); const [showFolderDialog, setShowFolderDialog] = useState(false); const [showS3Dialog, setShowS3Dialog] = useState(false); + const [showDuplicateDialog, setShowDuplicateDialog] = useState(false); const [awsEnabled, setAwsEnabled] = useState(false); const [folderPath, setFolderPath] = useState("/app/documents/"); const [bucketUrl, setBucketUrl] = useState("s3://"); @@ -48,6 +50,8 @@ export function KnowledgeDropdown({ const [s3Loading, setS3Loading] = useState(false); const [fileUploading, setFileUploading] = useState(false); const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); + const [pendingFile, setPendingFile] = useState(null); + const [duplicateFilename, setDuplicateFilename] = useState(""); const [cloudConnectors, setCloudConnectors] = useState<{ [key: string]: { name: string; @@ -168,101 +172,52 @@ export function KnowledgeDropdown({ const handleFileChange = async (e: React.ChangeEvent) => { const files = e.target.files; if (files && files.length > 0) { - // Close dropdown and disable button immediately after file selection - setIsOpen(false); - setFileUploading(true); + const file = files[0]; - // Trigger the same file upload event as the chat page - window.dispatchEvent( - new CustomEvent("fileUploadStart", { - detail: { filename: files[0].name }, - }), - ); + // Close dropdown immediately after file selection + setIsOpen(false); try { - const formData = new FormData(); - formData.append("file", files[0]); + // Check if filename already exists (using ORIGINAL filename) + console.log("[Duplicate Check] Checking file:", file.name); + const checkResponse = await fetch( + `/api/documents/check-filename?filename=${encodeURIComponent(file.name)}`, + ); - // Use router upload and ingest endpoint (automatically routes based on configuration) - const uploadIngestRes = await fetch("/api/router/upload_ingest", { - method: "POST", - body: formData, - }); + console.log("[Duplicate Check] Response status:", checkResponse.status); - const uploadIngestJson = await uploadIngestRes.json(); - - if (!uploadIngestRes.ok) { + if (!checkResponse.ok) { + const errorText = await checkResponse.text(); + console.error("[Duplicate Check] Error response:", errorText); throw new Error( - uploadIngestJson?.error || "Upload and ingest failed", + `Failed to check duplicates: ${checkResponse.statusText}`, ); } - // Extract results from the response - handle both unified and simple formats - const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id || uploadIngestJson?.task_id; - const filePath = - uploadIngestJson?.upload?.path || - uploadIngestJson?.path || - "uploaded"; - const runJson = uploadIngestJson?.ingestion; - const deleteResult = uploadIngestJson?.deletion; - console.log("c", uploadIngestJson ) - if (!fileId) { - throw new Error("Upload successful but no file id returned"); - } - // Check if ingestion actually succeeded - if ( - runJson && - runJson.status !== "COMPLETED" && - runJson.status !== "SUCCESS" - ) { - const errorMsg = runJson.error || "Ingestion pipeline failed"; - throw new Error( - `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`, - ); - } - // Log deletion status if provided - if (deleteResult) { - if (deleteResult.status === "deleted") { - console.log( - "File successfully cleaned up from Langflow:", - deleteResult.file_id, - ); - } else if (deleteResult.status === "delete_failed") { - console.warn( - "Failed to cleanup file from Langflow:", - deleteResult.error, - ); + const checkData = await checkResponse.json(); + console.log("[Duplicate Check] Result:", checkData); + + if (checkData.exists) { + // Show duplicate handling dialog + console.log("[Duplicate Check] Duplicate detected, showing dialog"); + setPendingFile(file); + setDuplicateFilename(file.name); + setShowDuplicateDialog(true); + // Reset file input + if (fileInputRef.current) { + fileInputRef.current.value = ""; } + return; } - // Notify UI - window.dispatchEvent( - new CustomEvent("fileUploaded", { - detail: { - file: files[0], - result: { - file_id: fileId, - file_path: filePath, - run: runJson, - deletion: deleteResult, - unified: true, - }, - }, - }), - ); - refetchTasks(); + // No duplicate, proceed with upload + console.log("[Duplicate Check] No duplicate, proceeding with upload"); + await uploadFile(file, false); } catch (error) { - window.dispatchEvent( - new CustomEvent("fileUploadError", { - detail: { - filename: files[0].name, - error: error instanceof Error ? error.message : "Upload failed", - }, - }), - ); - } finally { - window.dispatchEvent(new CustomEvent("fileUploadComplete")); - setFileUploading(false); + console.error("[Duplicate Check] Exception:", error); + toast.error("Failed to check for duplicates", { + description: error instanceof Error ? error.message : "Unknown error", + }); } } @@ -272,6 +227,111 @@ export function KnowledgeDropdown({ } }; + const uploadFile = async (file: File, replace: boolean) => { + setFileUploading(true); + + // Trigger the same file upload event as the chat page + window.dispatchEvent( + new CustomEvent("fileUploadStart", { + detail: { filename: file.name }, + }), + ); + + try { + const formData = new FormData(); + formData.append("file", file); + formData.append("replace_duplicates", replace.toString()); + + // Use router upload and ingest endpoint (automatically routes based on configuration) + const uploadIngestRes = await fetch("/api/router/upload_ingest", { + method: "POST", + body: formData, + }); + + const uploadIngestJson = await uploadIngestRes.json(); + + if (!uploadIngestRes.ok) { + throw new Error(uploadIngestJson?.error || "Upload and ingest failed"); + } + + // Extract results from the response - handle both unified and simple formats + const fileId = + uploadIngestJson?.upload?.id || + uploadIngestJson?.id || + uploadIngestJson?.task_id; + const filePath = + uploadIngestJson?.upload?.path || uploadIngestJson?.path || "uploaded"; + const runJson = uploadIngestJson?.ingestion; + const deleteResult = uploadIngestJson?.deletion; + console.log("c", uploadIngestJson); + if (!fileId) { + throw new Error("Upload successful but no file id returned"); + } + // Check if ingestion actually succeeded + if ( + runJson && + runJson.status !== "COMPLETED" && + runJson.status !== "SUCCESS" + ) { + const errorMsg = runJson.error || "Ingestion pipeline failed"; + throw new Error( + `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`, + ); + } + // Log deletion status if provided + if (deleteResult) { + if (deleteResult.status === "deleted") { + console.log( + "File successfully cleaned up from Langflow:", + deleteResult.file_id, + ); + } else if (deleteResult.status === "delete_failed") { + console.warn( + "Failed to cleanup file from Langflow:", + deleteResult.error, + ); + } + } + // Notify UI + window.dispatchEvent( + new CustomEvent("fileUploaded", { + detail: { + file: file, + result: { + file_id: fileId, + file_path: filePath, + run: runJson, + deletion: deleteResult, + unified: true, + }, + }, + }), + ); + + refetchTasks(); + } catch (error) { + window.dispatchEvent( + new CustomEvent("fileUploadError", { + detail: { + filename: file.name, + error: error instanceof Error ? error.message : "Upload failed", + }, + }), + ); + } finally { + window.dispatchEvent(new CustomEvent("fileUploadComplete")); + setFileUploading(false); + } + }; + + const handleOverwriteFile = async () => { + if (pendingFile) { + await uploadFile(pendingFile, true); + setPendingFile(null); + setDuplicateFilename(""); + } + }; + const handleFolderUpload = async () => { if (!folderPath.trim()) return; @@ -611,6 +671,15 @@ export function KnowledgeDropdown({ + + {/* Duplicate Handling Dialog */} + ); } diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index 8928ae8f..26e8ca00 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -135,7 +135,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { taskFileEntries.forEach(([filePath, fileInfo]) => { if (typeof fileInfo === "object" && fileInfo) { - const fileName = filePath.split("/").pop() || filePath; + // Use the filename from backend if available, otherwise extract from path + const fileName = (fileInfo as any).filename || filePath.split("/").pop() || filePath; const fileStatus = fileInfo.status as string; // Map backend file status to our TaskFile status diff --git a/src/api/documents.py b/src/api/documents.py index 82afb349..a367c9fe 100644 --- a/src/api/documents.py +++ b/src/api/documents.py @@ -6,14 +6,13 @@ from config.settings import INDEX_NAME logger = get_logger(__name__) -async def delete_documents_by_filename(request: Request, document_service, session_manager): - """Delete all documents with a specific filename""" - data = await request.json() - filename = data.get("filename") - +async def check_filename_exists(request: Request, document_service, session_manager): + """Check if a document with a specific filename already exists""" + filename = request.query_params.get("filename") + if not filename: - return JSONResponse({"error": "filename is required"}, status_code=400) - + return JSONResponse({"error": "filename parameter is required"}, status_code=400) + user = request.state.user jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) @@ -22,34 +21,99 @@ async def delete_documents_by_filename(request: Request, document_service, sessi opensearch_client = session_manager.get_user_opensearch_client( user.user_id, jwt_token ) - + + # Search for any document with this exact filename + # Try both .keyword (exact match) and regular field (analyzed match) + search_body = { + "query": { + "bool": { + "should": [ + {"term": {"filename.keyword": filename}}, + {"term": {"filename": filename}} + ], + "minimum_should_match": 1 + } + }, + "size": 1, + "_source": ["filename"] + } + + logger.debug(f"Checking filename existence: {filename}") + + response = await opensearch_client.search( + index=INDEX_NAME, + body=search_body + ) + + # Check if any hits were found + hits = response.get("hits", {}).get("hits", []) + exists = len(hits) > 0 + + logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}") + + return JSONResponse({ + "exists": exists, + "filename": filename + }, status_code=200) + + except Exception as e: + logger.error("Error checking filename existence", filename=filename, error=str(e)) + error_str = str(e) + if "AuthenticationException" in error_str: + return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403) + else: + return JSONResponse({"error": str(e)}, status_code=500) + + +async def delete_documents_by_filename(request: Request, document_service, session_manager): + """Delete all documents with a specific filename""" + data = await request.json() + filename = data.get("filename") + + if not filename: + return JSONResponse({"error": "filename is required"}, status_code=400) + + user = request.state.user + jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) + + try: + # Get user's OpenSearch client + opensearch_client = session_manager.get_user_opensearch_client( + user.user_id, jwt_token + ) + # Delete by query to remove all chunks of this document + # Use both .keyword and regular field to ensure we catch all variations delete_query = { "query": { "bool": { - "must": [ + "should": [ + {"term": {"filename.keyword": filename}}, {"term": {"filename": filename}} - ] + ], + "minimum_should_match": 1 } } } - + + logger.debug(f"Deleting documents with filename: {filename}") + result = await opensearch_client.delete_by_query( index=INDEX_NAME, body=delete_query, conflicts="proceed" ) - + deleted_count = result.get("deleted", 0) logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id) - + return JSONResponse({ "success": True, "deleted_chunks": deleted_count, "filename": filename, "message": f"All documents with filename '{filename}' deleted successfully" }, status_code=200) - + except Exception as e: logger.error("Error deleting documents by filename", filename=filename, error=str(e)) error_str = str(e) diff --git a/src/api/router.py b/src/api/router.py index 23ce5bdf..327757be 100644 --- a/src/api/router.py +++ b/src/api/router.py @@ -77,6 +77,7 @@ async def langflow_upload_ingest_task( settings_json = form.get("settings") tweaks_json = form.get("tweaks") delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" + replace_duplicates = form.get("replace_duplicates", "false").lower() == "true" # Parse JSON fields if provided settings = None @@ -112,7 +113,8 @@ async def langflow_upload_ingest_task( import tempfile import os temp_file_paths = [] - + original_filenames = [] + try: # Create temp directory reference once temp_dir = tempfile.gettempdir() @@ -121,8 +123,11 @@ async def langflow_upload_ingest_task( # Read file content content = await upload_file.read() - # Create temporary file with the actual filename (not a temp prefix) - # Store in temp directory but use the real filename + # Store ORIGINAL filename (not transformed) + original_filenames.append(upload_file.filename) + + # Create temporary file with TRANSFORMED filename for filesystem safety + # Transform: spaces and / to underscore safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") temp_path = os.path.join(temp_dir, safe_filename) @@ -153,6 +158,7 @@ async def langflow_upload_ingest_task( task_id = await task_service.create_langflow_upload_task( user_id=user_id, file_paths=temp_file_paths, + original_filenames=original_filenames, langflow_file_service=langflow_file_service, session_manager=session_manager, jwt_token=jwt_token, @@ -162,6 +168,7 @@ async def langflow_upload_ingest_task( tweaks=tweaks, settings=settings, delete_after_ingest=delete_after_ingest, + replace_duplicates=replace_duplicates, ) logger.debug("Langflow upload task created successfully", task_id=task_id) diff --git a/src/main.py b/src/main.py index 230ded79..bf6da342 100644 --- a/src/main.py +++ b/src/main.py @@ -953,6 +953,17 @@ async def create_app(): methods=["POST", "GET"], ), # Document endpoints + Route( + "/documents/check-filename", + require_auth(services["session_manager"])( + partial( + documents.check_filename_exists, + document_service=services["document_service"], + session_manager=services["session_manager"], + ) + ), + methods=["GET"], + ), Route( "/documents/delete-by-filename", require_auth(services["session_manager"])( diff --git a/src/models/processors.py b/src/models/processors.py index a1d72777..6d7b74b4 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -55,6 +55,108 @@ class TaskProcessor: await asyncio.sleep(retry_delay) retry_delay *= 2 # Exponential backoff + async def check_filename_exists( + self, + filename: str, + opensearch_client, + ) -> bool: + """ + Check if a document with the given filename already exists in OpenSearch. + Returns True if any chunks with this filename exist. + """ + from config.settings import INDEX_NAME + import asyncio + + max_retries = 3 + retry_delay = 1.0 + + for attempt in range(max_retries): + try: + # Search for any document with this exact filename + search_body = { + "query": { + "term": { + "filename.keyword": filename + } + }, + "size": 1, + "_source": False + } + + response = await opensearch_client.search( + index=INDEX_NAME, + body=search_body + ) + + # Check if any hits were found + hits = response.get("hits", {}).get("hits", []) + return len(hits) > 0 + + except (asyncio.TimeoutError, Exception) as e: + if attempt == max_retries - 1: + logger.error( + "OpenSearch filename check failed after retries", + filename=filename, + error=str(e), + attempt=attempt + 1 + ) + # On final failure, assume document doesn't exist (safer to reprocess than skip) + logger.warning( + "Assuming filename doesn't exist due to connection issues", + filename=filename + ) + return False + else: + logger.warning( + "OpenSearch filename check failed, retrying", + filename=filename, + error=str(e), + attempt=attempt + 1, + retry_in=retry_delay + ) + await asyncio.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + + async def delete_document_by_filename( + self, + filename: str, + opensearch_client, + ) -> None: + """ + Delete all chunks of a document with the given filename from OpenSearch. + """ + from config.settings import INDEX_NAME + + try: + # Delete all documents with this filename + delete_body = { + "query": { + "term": { + "filename.keyword": filename + } + } + } + + response = await opensearch_client.delete_by_query( + index=INDEX_NAME, + body=delete_body + ) + + deleted_count = response.get("deleted", 0) + logger.info( + "Deleted existing document chunks", + filename=filename, + deleted_count=deleted_count + ) + + except Exception as e: + logger.error( + "Failed to delete existing document", + filename=filename, + error=str(e) + ) + raise + async def process_document_standard( self, file_path: str, @@ -527,6 +629,7 @@ class LangflowFileProcessor(TaskProcessor): tweaks: dict = None, settings: dict = None, delete_after_ingest: bool = True, + replace_duplicates: bool = False, ): super().__init__() self.langflow_file_service = langflow_file_service @@ -539,6 +642,7 @@ class LangflowFileProcessor(TaskProcessor): self.tweaks = tweaks or {} self.settings = settings self.delete_after_ingest = delete_after_ingest + self.replace_duplicates = replace_duplicates async def process_item( self, upload_task: UploadTask, item: str, file_task: FileTask @@ -554,33 +658,40 @@ class LangflowFileProcessor(TaskProcessor): file_task.updated_at = time.time() try: - # Compute hash and check if already exists - from utils.hash_utils import hash_id - file_hash = hash_id(item) + # Use the ORIGINAL filename stored in file_task (not the transformed temp path) + # This ensures we check/store the original filename with spaces, etc. + original_filename = file_task.filename or os.path.basename(item) - # Check if document already exists + # Check if document with same filename already exists opensearch_client = self.session_manager.get_user_opensearch_client( self.owner_user_id, self.jwt_token ) - if await self.check_document_exists(file_hash, opensearch_client): - file_task.status = TaskStatus.COMPLETED - file_task.result = {"status": "unchanged", "id": file_hash} + + filename_exists = await self.check_filename_exists(original_filename, opensearch_client) + + if filename_exists and not self.replace_duplicates: + # Duplicate exists and user hasn't confirmed replacement + file_task.status = TaskStatus.FAILED + file_task.error = f"File with name '{original_filename}' already exists" file_task.updated_at = time.time() - upload_task.successful_files += 1 + upload_task.failed_files += 1 return + elif filename_exists and self.replace_duplicates: + # Delete existing document before uploading new one + logger.info(f"Replacing existing document: {original_filename}") + await self.delete_document_by_filename(original_filename, opensearch_client) # Read file content for processing with open(item, 'rb') as f: content = f.read() - # Create file tuple for upload - # The temp file now has the actual filename, no need to extract it - filename = os.path.basename(item) - content_type, _ = mimetypes.guess_type(filename) + # Create file tuple for upload using ORIGINAL filename + # This ensures the document is indexed with the original name + content_type, _ = mimetypes.guess_type(original_filename) if not content_type: content_type = 'application/octet-stream' - - file_tuple = (filename, content, content_type) + + file_tuple = (original_filename, content, content_type) # Get JWT token using same logic as DocumentFileProcessor # This will handle anonymous JWT creation if needed diff --git a/src/models/tasks.py b/src/models/tasks.py index 236927ab..253cabb5 100644 --- a/src/models/tasks.py +++ b/src/models/tasks.py @@ -20,7 +20,8 @@ class FileTask: retry_count: int = 0 created_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time) - + filename: Optional[str] = None # Original filename for display + @property def duration_seconds(self) -> float: """Duration in seconds from creation to last update""" diff --git a/src/services/task_service.py b/src/services/task_service.py index be5312a0..eb5825c0 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -59,6 +59,7 @@ class TaskService: file_paths: list, langflow_file_service, session_manager, + original_filenames: list = None, jwt_token: str = None, owner_name: str = None, owner_email: str = None, @@ -66,6 +67,7 @@ class TaskService: tweaks: dict = None, settings: dict = None, delete_after_ingest: bool = True, + replace_duplicates: bool = False, ) -> str: """Create a new upload task for Langflow file processing with upload and ingest""" # Use LangflowFileProcessor with user context @@ -82,18 +84,31 @@ class TaskService: tweaks=tweaks, settings=settings, delete_after_ingest=delete_after_ingest, + replace_duplicates=replace_duplicates, ) - return await self.create_custom_task(user_id, file_paths, processor) + return await self.create_custom_task(user_id, file_paths, processor, original_filenames) - async def create_custom_task(self, user_id: str, items: list, processor) -> str: + async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: list = None) -> str: """Create a new task with custom processor for any type of items""" + import os # Store anonymous tasks under a stable key so they can be retrieved later store_user_id = user_id or AnonymousUser().user_id task_id = str(uuid.uuid4()) + + # Create file tasks with original filenames if provided + file_tasks = {} + for i, item in enumerate(items): + if original_filenames and i < len(original_filenames): + filename = original_filenames[i] + else: + filename = os.path.basename(str(item)) + + file_tasks[str(item)] = FileTask(file_path=str(item), filename=filename) + upload_task = UploadTask( task_id=task_id, total_files=len(items), - file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, + file_tasks=file_tasks, ) # Attach the custom processor to the task @@ -268,6 +283,7 @@ class TaskService: "created_at": file_task.created_at, "updated_at": file_task.updated_at, "duration_seconds": file_task.duration_seconds, + "filename": file_task.filename, } # Count running and pending files @@ -322,6 +338,7 @@ class TaskService: "created_at": file_task.created_at, "updated_at": file_task.updated_at, "duration_seconds": file_task.duration_seconds, + "filename": file_task.filename, } if file_task.status.value == "running": From fb29f72598aad139e4c0623d861094a18e7b8fba Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 15:39:51 -0300 Subject: [PATCH 10/23] update task toast --- frontend/src/contexts/task-context.tsx | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index 26e8ca00..d25130f7 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -205,9 +205,19 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { previousTask && previousTask.status !== "completed" && currentTask.status === "completed" ) { - // Task just completed - show success toast - toast.success("Task completed successfully", { - description: `Task ${currentTask.task_id} has finished processing.`, + // Task just completed - show success toast with file counts + const successfulFiles = currentTask.successful_files || 0; + const failedFiles = currentTask.failed_files || 0; + + let description = ""; + if (failedFiles > 0) { + description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully, ${failedFiles} file${failedFiles !== 1 ? 's' : ''} failed`; + } else { + description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully`; + } + + toast.success("Task completed", { + description, action: { label: "View", onClick: () => console.log("View task", currentTask.task_id), From 9336aa287e87032bd7385e3824e548df5de26f0e Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 16:12:09 -0300 Subject: [PATCH 11/23] fixed lint --- frontend/src/app/knowledge/page.tsx | 110 ++++++++++++++++++---------- 1 file changed, 71 insertions(+), 39 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index c6d254c4..9e933903 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -1,10 +1,15 @@ "use client"; -import type { ColDef } from "ag-grid-community"; +import type { ColDef, GetRowIdParams } from "ag-grid-community"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react"; import { useRouter } from "next/navigation"; -import { type ChangeEvent, useCallback, useRef, useState } from "react"; +import { + type ChangeEvent, + useCallback, + useRef, + useState, +} from "react"; import { SiGoogledrive } from "react-icons/si"; import { TbBrandOnedrive } from "react-icons/tb"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; @@ -54,15 +59,10 @@ function SearchPage() { const deleteDocumentMutation = useDeleteDocument(); - const { data = [], isFetching } = useGetSearchQuery( + const { data: searchData = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", parsedFilterData, ); - - const handleTableSearch = (e: ChangeEvent) => { - gridRef.current?.api.setGridOption("quickFilterText", e.target.value); - }; - // Convert TaskFiles to File format and merge with backend results const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { return { @@ -75,60 +75,91 @@ function SearchPage() { }; }); - const backendFiles = (data as File[]).filter((file) => !taskFilesAsFiles.some((taskFile) => taskFile.filename === file.filename && taskFile.status === "processing")); + // Create a map of task files by filename for quick lookup + const taskFileMap = new Map( + taskFilesAsFiles.map((file) => [file.filename, file]), + ); + + // Override backend files with task file status if they exist + const backendFiles = (searchData as File[]) + .map((file) => { + const taskFile = taskFileMap.get(file.filename); + if (taskFile) { + // Override backend file with task file data (includes status) + return { ...file, ...taskFile }; + } + return file; + }) + .filter((file) => { + // Only filter out files that are currently processing AND in taskFiles + const taskFile = taskFileMap.get(file.filename); + return !taskFile || taskFile.status !== "processing"; + }); const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { return ( taskFile.status !== "active" && !backendFiles.some( - (backendFile) => backendFile.filename === taskFile.filename,) + (backendFile) => backendFile.filename === taskFile.filename, + ) ); }); // Combine task files first, then backend files const fileResults = [...backendFiles, ...filteredTaskFiles]; + const handleTableSearch = (e: ChangeEvent) => { + gridRef.current?.api.setGridOption("quickFilterText", e.target.value); + }; + const gridRef = useRef(null); const columnDefs = [ { field: "filename", headerName: "Source", - checkboxSelection: (data) => (data?.data?.status || "active") === "active", + checkboxSelection: (params: CustomCellRendererProps) => + (params?.data?.status || "active") === "active", headerCheckboxSelection: true, initialFlex: 2, minWidth: 220, cellRenderer: ({ data, value }: CustomCellRendererProps) => { + // Read status directly from data on each render + const status = data?.status || "active"; + const isActive = status === "active"; + console.log(data?.filename, status, "a"); return ( -
{((data?.status || "active") !== "active") && -
- } -
+
+
+ +
); }, }, { field: "size", headerName: "Size", - valueFormatter: (params) => + valueFormatter: (params: CustomCellRendererProps) => params.value ? `${Math.round(params.value / 1024)} KB` : "-", }, { @@ -138,13 +169,13 @@ function SearchPage() { { field: "owner", headerName: "Owner", - valueFormatter: (params) => + valueFormatter: (params: CustomCellRendererProps) => params.data?.owner_name || params.data?.owner_email || "—", }, { field: "chunkCount", headerName: "Chunks", - valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", + valueFormatter: (params: CustomCellRendererProps) => params.data?.chunkCount?.toString() || "-", }, { field: "avgScore", @@ -162,6 +193,7 @@ function SearchPage() { field: "status", headerName: "Status", cellRenderer: ({ data }: CustomCellRendererProps) => { + console.log(data?.filename, data?.status, "b"); // Default to 'active' status if no status is provided const status = data?.status || "active"; return ; @@ -188,7 +220,7 @@ function SearchPage() { resizable: false, sortable: false, initialFlex: 0, - } + }, ]; const defaultColDef: ColDef = { @@ -323,7 +355,7 @@ function SearchPage() { []} defaultColDef={defaultColDef} loading={isFetching} ref={gridRef} @@ -331,7 +363,7 @@ function SearchPage() { rowSelection="multiple" rowMultiSelectWithClick={false} suppressRowClickSelection={true} - getRowId={(params) => params.data.filename} + getRowId={(params: GetRowIdParams) => params.data?.filename} domLayout="normal" onSelectionChanged={onSelectionChanged} noRowsOverlayComponent={() => ( From ca9bee8222bd615501c90e8e7f01913243c3d08d Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 16:12:18 -0300 Subject: [PATCH 12/23] removed file from query when overwriting --- frontend/components/knowledge-dropdown.tsx | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index 0b106360..7fe84259 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -1,5 +1,6 @@ "use client"; +import { useQueryClient } from "@tanstack/react-query"; import { ChevronDown, Cloud, @@ -26,6 +27,7 @@ import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { useTask } from "@/contexts/task-context"; import { cn } from "@/lib/utils"; +import type { File as SearchFile } from "@/src/app/api/queries/useGetSearchQuery"; interface KnowledgeDropdownProps { active?: boolean; @@ -38,6 +40,7 @@ export function KnowledgeDropdown({ }: KnowledgeDropdownProps) { const { addTask } = useTask(); const { refetch: refetchTasks } = useGetTasksQuery(); + const queryClient = useQueryClient(); const router = useRouter(); const [isOpen, setIsOpen] = useState(false); const [showFolderDialog, setShowFolderDialog] = useState(false); @@ -326,6 +329,15 @@ export function KnowledgeDropdown({ const handleOverwriteFile = async () => { if (pendingFile) { + // Remove the old file from all search query caches before overwriting + queryClient.setQueriesData({ queryKey: ["search"] }, (oldData: []) => { + if (!oldData) return oldData; + // Filter out the file that's being overwritten + return oldData.filter( + (file: SearchFile) => file.filename !== pendingFile.name, + ); + }); + await uploadFile(pendingFile, true); setPendingFile(null); setDuplicateFilename(""); @@ -676,7 +688,6 @@ export function KnowledgeDropdown({ From 4de9a0d085848abbab76db05b9f06c9d3d81198f Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 16:12:24 -0300 Subject: [PATCH 13/23] removed unused prop --- frontend/components/duplicate-handling-dialog.tsx | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/frontend/components/duplicate-handling-dialog.tsx b/frontend/components/duplicate-handling-dialog.tsx index 2f92ea50..d5cb2edf 100644 --- a/frontend/components/duplicate-handling-dialog.tsx +++ b/frontend/components/duplicate-handling-dialog.tsx @@ -15,21 +15,16 @@ import { interface DuplicateHandlingDialogProps { open: boolean; onOpenChange: (open: boolean) => void; - filename: string; onOverwrite: () => void | Promise; isLoading?: boolean; } export const DuplicateHandlingDialog: React.FC< DuplicateHandlingDialogProps -> = ({ open, onOpenChange, filename, onOverwrite, isLoading = false }) => { +> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => { const handleOverwrite = async () => { - try { - await onOverwrite(); - onOpenChange(false); - } catch (error) { - // Error handling is done by the parent component - } + await onOverwrite(); + onOpenChange(false); }; return ( From 1fdb251a47a7168f9431e00c8f714a9e088db09a Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 16:15:32 -0300 Subject: [PATCH 14/23] UI minor tweaks --- frontend/components/ui/input.tsx | 2 +- frontend/src/app/knowledge/chunks/page.tsx | 626 +++++++++++---------- frontend/src/app/knowledge/page.tsx | 3 +- 3 files changed, 316 insertions(+), 315 deletions(-) diff --git a/frontend/components/ui/input.tsx b/frontend/components/ui/input.tsx index ffcda454..86e638a1 100644 --- a/frontend/components/ui/input.tsx +++ b/frontend/components/ui/input.tsx @@ -44,7 +44,7 @@ const Input = React.forwardRef( placeholder={placeholder} className={cn( "primary-input", - icon && "pl-9", + icon && "!pl-9", type === "password" && "!pr-8", icon ? inputClassName : className )} diff --git a/frontend/src/app/knowledge/chunks/page.tsx b/frontend/src/app/knowledge/chunks/page.tsx index cb96eddc..7d49a56e 100644 --- a/frontend/src/app/knowledge/chunks/page.tsx +++ b/frontend/src/app/knowledge/chunks/page.tsx @@ -1,289 +1,291 @@ "use client"; import { ArrowLeft, Check, Copy, Loader2, Search } from "lucide-react"; -import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; import { useRouter, useSearchParams } from "next/navigation"; +import { Suspense, useCallback, useEffect, useMemo, useState } from "react"; import { ProtectedRoute } from "@/components/protected-route"; import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useTask } from "@/contexts/task-context"; import { - type ChunkResult, - type File, - useGetSearchQuery, + type ChunkResult, + type File, + useGetSearchQuery, } from "../../api/queries/useGetSearchQuery"; -import { Label } from "@/components/ui/label"; -import { Checkbox } from "@/components/ui/checkbox"; -import { Input } from "@/components/ui/input"; const getFileTypeLabel = (mimetype: string) => { - if (mimetype === "application/pdf") return "PDF"; - if (mimetype === "text/plain") return "Text"; - if (mimetype === "application/msword") return "Word Document"; - return "Unknown"; + if (mimetype === "application/pdf") return "PDF"; + if (mimetype === "text/plain") return "Text"; + if (mimetype === "application/msword") return "Word Document"; + return "Unknown"; }; function ChunksPageContent() { - const router = useRouter(); - const searchParams = useSearchParams(); - const { isMenuOpen } = useTask(); - const { parsedFilterData, isPanelOpen } = useKnowledgeFilter(); + const router = useRouter(); + const searchParams = useSearchParams(); + const { isMenuOpen } = useTask(); + const { parsedFilterData, isPanelOpen } = useKnowledgeFilter(); - const filename = searchParams.get("filename"); - const [chunks, setChunks] = useState([]); - const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< - ChunkResult[] - >([]); - const [selectedChunks, setSelectedChunks] = useState>(new Set()); - const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< - number | null - >(null); + const filename = searchParams.get("filename"); + const [chunks, setChunks] = useState([]); + const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< + ChunkResult[] + >([]); + const [selectedChunks, setSelectedChunks] = useState>(new Set()); + const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< + number | null + >(null); - // Calculate average chunk length - const averageChunkLength = useMemo( - () => - chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / - chunks.length || 0, - [chunks] - ); + // Calculate average chunk length + const averageChunkLength = useMemo( + () => + chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / + chunks.length || 0, + [chunks], + ); - const [selectAll, setSelectAll] = useState(false); - const [queryInputText, setQueryInputText] = useState( - parsedFilterData?.query ?? "" - ); + const [selectAll, setSelectAll] = useState(false); + const [queryInputText, setQueryInputText] = useState( + parsedFilterData?.query ?? "", + ); - // Use the same search query as the knowledge page, but we'll filter for the specific file - const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); + // Use the same search query as the knowledge page, but we'll filter for the specific file + const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); - useEffect(() => { - if (queryInputText === "") { - setChunksFilteredByQuery(chunks); - } else { - setChunksFilteredByQuery( - chunks.filter((chunk) => - chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) - ) - ); - } - }, [queryInputText, chunks]); + useEffect(() => { + if (queryInputText === "") { + setChunksFilteredByQuery(chunks); + } else { + setChunksFilteredByQuery( + chunks.filter((chunk) => + chunk.text.toLowerCase().includes(queryInputText.toLowerCase()), + ), + ); + } + }, [queryInputText, chunks]); - const handleCopy = useCallback((text: string, index: number) => { - // Trim whitespace and remove new lines/tabs for cleaner copy - navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); - setActiveCopiedChunkIndex(index); - setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds - }, []); + const handleCopy = useCallback((text: string, index: number) => { + // Trim whitespace and remove new lines/tabs for cleaner copy + navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); + setActiveCopiedChunkIndex(index); + setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds + }, []); - const fileData = (data as File[]).find( - (file: File) => file.filename === filename - ); + const fileData = (data as File[]).find( + (file: File) => file.filename === filename, + ); - // Extract chunks for the specific file - useEffect(() => { - if (!filename || !(data as File[]).length) { - setChunks([]); - return; - } + // Extract chunks for the specific file + useEffect(() => { + if (!filename || !(data as File[]).length) { + setChunks([]); + return; + } - setChunks(fileData?.chunks || []); - }, [data, filename]); + setChunks(fileData?.chunks || []); + }, [data, filename]); - // Set selected state for all checkboxes when selectAll changes - useEffect(() => { - if (selectAll) { - setSelectedChunks(new Set(chunks.map((_, index) => index))); - } else { - setSelectedChunks(new Set()); - } - }, [selectAll, setSelectedChunks, chunks]); + // Set selected state for all checkboxes when selectAll changes + useEffect(() => { + if (selectAll) { + setSelectedChunks(new Set(chunks.map((_, index) => index))); + } else { + setSelectedChunks(new Set()); + } + }, [selectAll, setSelectedChunks, chunks]); - const handleBack = useCallback(() => { - router.push("/knowledge"); - }, [router]); + const handleBack = useCallback(() => { + router.push("/knowledge"); + }, [router]); - const handleChunkCardCheckboxChange = useCallback( - (index: number) => { - setSelectedChunks((prevSelected) => { - const newSelected = new Set(prevSelected); - if (newSelected.has(index)) { - newSelected.delete(index); - } else { - newSelected.add(index); - } - return newSelected; - }); - }, - [setSelectedChunks] - ); + const handleChunkCardCheckboxChange = useCallback( + (index: number) => { + setSelectedChunks((prevSelected) => { + const newSelected = new Set(prevSelected); + if (newSelected.has(index)) { + newSelected.delete(index); + } else { + newSelected.add(index); + } + return newSelected; + }); + }, + [setSelectedChunks], + ); - if (!filename) { - return ( -
-
- -

No file specified

-

- Please select a file from the knowledge page -

-
-
- ); - } + if (!filename) { + return ( +
+
+ +

No file specified

+

+ Please select a file from the knowledge page +

+
+
+ ); + } - return ( -
-
- {/* Header */} -
-
- -

- {/* Removes file extension from filename */} - {filename.replace(/\.[^/.]+$/, "")} -

-
-
-
- : null} - id="search-query" - type="text" - defaultValue={parsedFilterData?.query} - value={queryInputText} - onChange={(e) => setQueryInputText(e.target.value)} - placeholder="Search chunks..." - /> -
-
- - setSelectAll(!!handleSelectAll) - } - /> - -
-
-
+ return ( +
+
+ {/* Header */} +
+
+ +

+ {/* Removes file extension from filename */} + {filename.replace(/\.[^/.]+$/, "")} +

+
+
+
+ : null + } + id="search-query" + type="text" + defaultValue={parsedFilterData?.query} + value={queryInputText} + onChange={(e) => setQueryInputText(e.target.value)} + placeholder="Search chunks..." + /> +
+
+ + setSelectAll(!!handleSelectAll) + } + /> + +
+
+
- {/* Content Area - matches knowledge page structure */} -
- {isFetching ? ( -
-
- -

- Loading chunks... -

-
-
- ) : chunks.length === 0 ? ( -
-
- -

No chunks found

-

- This file may not have been indexed yet -

-
-
- ) : ( -
- {chunksFilteredByQuery.map((chunk, index) => ( -
-
-
-
- - handleChunkCardCheckboxChange(index) - } - /> -
- - Chunk {chunk.page} - - - {chunk.text.length} chars - -
- -
-
+ {/* Content Area - matches knowledge page structure */} +
+ {isFetching ? ( +
+
+ +

+ Loading chunks... +

+
+
+ ) : chunks.length === 0 ? ( +
+
+ +

No chunks found

+

+ This file may not have been indexed yet +

+
+
+ ) : ( +
+ {chunksFilteredByQuery.map((chunk, index) => ( +
+
+
+
+ + handleChunkCardCheckboxChange(index) + } + /> +
+ + Chunk {chunk.page} + + + {chunk.text.length} chars + +
+ +
+
- {/* TODO: Update to use active toggle */} - {/* + {/* TODO: Update to use active toggle */} + {/* Active */} -
-
- {chunk.text} -
-
- ))} -
- )} -
-
- {/* Right panel - Summary (TODO), Technical details, */} -
-
-

Technical details

-
-
-
Total chunks
-
- {chunks.length} -
-
-
-
Avg length
-
- {averageChunkLength.toFixed(0)} chars -
-
- {/* TODO: Uncomment after data is available */} - {/*
+
+
+ {chunk.text} +
+
+ ))} +
+ )} +
+
+ {/* Right panel - Summary (TODO), Technical details, */} +
+
+

Technical details

+
+
+
Total chunks
+
+ {chunks.length} +
+
+
+
Avg length
+
+ {averageChunkLength.toFixed(0)} chars +
+
+ {/* TODO: Uncomment after data is available */} + {/*
Process time
@@ -293,76 +295,76 @@ function ChunksPageContent() {
*/} -
-
-
-

Original document

-
-
-
Name
-
- {fileData?.filename} -
-
-
-
Type
-
- {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} -
-
-
-
Size
-
- {fileData?.size - ? `${Math.round(fileData.size / 1024)} KB` - : "Unknown"} -
-
-
-
Uploaded
-
- N/A -
-
- {/* TODO: Uncomment after data is available */} - {/*
+
+
+
+

Original document

+
+
+
Name
+
+ {fileData?.filename} +
+
+
+
Type
+
+ {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} +
+
+
+
Size
+
+ {fileData?.size + ? `${Math.round(fileData.size / 1024)} KB` + : "Unknown"} +
+
+
+
Uploaded
+
+ N/A +
+
+ {/* TODO: Uncomment after data is available */} + {/*
Source
*/} -
-
Updated
-
- N/A -
-
-
-
-
-
- ); +
+
Updated
+
+ N/A +
+
+ +
+
+
+ ); } function ChunksPage() { - return ( - -
- -

Loading...

-
-
- } - > - - - ); + return ( + +
+ +

Loading...

+
+ + } + > + +
+ ); } export default function ProtectedChunksPage() { - return ( - - - - ); + return ( + + + + ); } diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 9e933903..5395d85d 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -310,14 +310,13 @@ function SearchPage() { )} From 946d3edc89d09af8fabd5e6b1981466d76fae7ee Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 16:54:40 -0300 Subject: [PATCH 15/23] refresh tasks on entering page, make failed files persist --- frontend/src/app/knowledge/page.tsx | 7 ++++++- frontend/src/contexts/task-context.tsx | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/frontend/src/app/knowledge/page.tsx b/frontend/src/app/knowledge/page.tsx index 5395d85d..73cc2c7d 100644 --- a/frontend/src/app/knowledge/page.tsx +++ b/frontend/src/app/knowledge/page.tsx @@ -7,6 +7,7 @@ import { useRouter } from "next/navigation"; import { type ChangeEvent, useCallback, + useEffect, useRef, useState, } from "react"; @@ -51,7 +52,7 @@ function getSourceIcon(connectorType?: string) { function SearchPage() { const router = useRouter(); - const { isMenuOpen, files: taskFiles } = useTask(); + const { isMenuOpen, files: taskFiles, refreshTasks } = useTask(); const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = useKnowledgeFilter(); const [selectedRows, setSelectedRows] = useState([]); @@ -59,6 +60,10 @@ function SearchPage() { const deleteDocumentMutation = useDeleteDocument(); + useEffect(() => { + refreshTasks(); + }, [refreshTasks]); + const { data: searchData = [], isFetching } = useGetSearchQuery( parsedFilterData?.query || "*", parsedFilterData, diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index d25130f7..69a1214f 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -226,7 +226,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { setTimeout(() => { refetchSearch(); setFiles((prevFiles) => - prevFiles.filter((file) => file.task_id !== currentTask.task_id && file.status !== "failed"), + prevFiles.filter((file) => file.task_id !== currentTask.task_id || file.status === "failed"), ); }, 500); } else if ( @@ -258,6 +258,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { ); const refreshTasks = useCallback(async () => { + setFiles([]); await refetchTasks(); }, [refetchTasks]); From 509b6c613228fa5ec592533d6a5e69b8bfa76aa2 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 17:06:07 -0300 Subject: [PATCH 16/23] make view button open menu --- frontend/src/contexts/task-context.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index 69a1214f..a8693526 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -220,7 +220,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { description, action: { label: "View", - onClick: () => console.log("View task", currentTask.task_id), + onClick: () => setIsMenuOpen(true), }, }); setTimeout(() => { @@ -252,7 +252,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { (_taskId: string) => { // React Query will automatically handle polling when tasks are active // Just trigger a refetch to get the latest data - refetchTasks(); + setTimeout(() => { + refetchTasks(); + }, 500); }, [refetchTasks], ); From 7201a914be856775a0bafc7ef445a8882766d3ae Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Fri, 3 Oct 2025 17:12:36 -0300 Subject: [PATCH 17/23] open tasks menu when clicking view --- frontend/src/components/task-notification-menu.tsx | 11 +++++++++-- frontend/src/contexts/task-context.tsx | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/task-notification-menu.tsx b/frontend/src/components/task-notification-menu.tsx index e17f9579..fed7e6f1 100644 --- a/frontend/src/components/task-notification-menu.tsx +++ b/frontend/src/components/task-notification-menu.tsx @@ -1,6 +1,6 @@ "use client" -import { useState } from 'react' +import { useEffect, useState } from 'react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' @@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge' import { useTask, Task } from '@/contexts/task-context' export function TaskNotificationMenu() { - const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() + const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask() const [isExpanded, setIsExpanded] = useState(false) + // Sync local state with context state + useEffect(() => { + if (isRecentTasksExpanded) { + setIsExpanded(true) + } + }, [isRecentTasksExpanded]) + // Don't render if menu is closed if (!isMenuOpen) return null diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index a8693526..f5d0c0c4 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -43,6 +43,8 @@ interface TaskContextType { isFetching: boolean; isMenuOpen: boolean; toggleMenu: () => void; + isRecentTasksExpanded: boolean; + setRecentTasksExpanded: (expanded: boolean) => void; // React Query states isLoading: boolean; error: Error | null; @@ -53,6 +55,7 @@ const TaskContext = createContext(undefined); export function TaskProvider({ children }: { children: React.ReactNode }) { const [files, setFiles] = useState([]); const [isMenuOpen, setIsMenuOpen] = useState(false); + const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false); const previousTasksRef = useRef([]); const { isAuthenticated, isNoAuthMode } = useAuth(); @@ -220,7 +223,10 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { description, action: { label: "View", - onClick: () => setIsMenuOpen(true), + onClick: () => { + setIsMenuOpen(true); + setIsRecentTasksExpanded(true); + }, }, }); setTimeout(() => { @@ -302,6 +308,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { isFetching, isMenuOpen, toggleMenu, + isRecentTasksExpanded, + setRecentTasksExpanded: setIsRecentTasksExpanded, isLoading, error, }; From 5ea70cc7797b652eb829617d466d18e706a8adae Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Fri, 3 Oct 2025 16:17:05 -0400 Subject: [PATCH 18/23] Refactor file upload task to use filename mapping (#195) Changed the handling of original filenames in Langflow upload tasks to use a mapping from file paths to original filenames instead of a list. Updated both the API router and TaskService to support this change, improving reliability when associating uploaded files with their original names. --- src/api/router.py | 83 ++++++++++++++++++++---------------- src/services/task_service.py | 25 ++++++----- 2 files changed, 60 insertions(+), 48 deletions(-) diff --git a/src/api/router.py b/src/api/router.py index 327757be..15a9b116 100644 --- a/src/api/router.py +++ b/src/api/router.py @@ -13,27 +13,27 @@ logger = get_logger(__name__) async def upload_ingest_router( - request: Request, - document_service=None, - langflow_file_service=None, + request: Request, + document_service=None, + langflow_file_service=None, session_manager=None, - task_service=None + task_service=None, ): """ Router endpoint that automatically routes upload requests based on configuration. - + - If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload) - If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service - + This provides a single endpoint that users can call regardless of backend configuration. All langflow uploads are processed as background tasks for better scalability. """ try: logger.debug( - "Router upload_ingest endpoint called", - disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW + "Router upload_ingest endpoint called", + disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW, ) - + # Route based on configuration if DISABLE_INGEST_WITH_LANGFLOW: # Route to traditional OpenRAG upload @@ -42,8 +42,10 @@ async def upload_ingest_router( else: # Route to Langflow upload and ingest using task service logger.debug("Routing to Langflow upload-ingest pipeline via task service") - return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) - + return await langflow_upload_ingest_task( + request, langflow_file_service, session_manager, task_service + ) + except Exception as e: logger.error("Error in upload_ingest_router", error=str(e)) error_msg = str(e) @@ -57,17 +59,14 @@ async def upload_ingest_router( async def langflow_upload_ingest_task( - request: Request, - langflow_file_service, - session_manager, - task_service + request: Request, langflow_file_service, session_manager, task_service ): """Task-based langflow upload and ingest for single/multiple files""" try: logger.debug("Task-based langflow upload_ingest endpoint called") form = await request.form() upload_files = form.getlist("file") - + if not upload_files or len(upload_files) == 0: logger.error("No files provided in task-based upload request") return JSONResponse({"error": "Missing files"}, status_code=400) @@ -82,10 +81,11 @@ async def langflow_upload_ingest_task( # Parse JSON fields if provided settings = None tweaks = None - + if settings_json: try: import json + settings = json.loads(settings_json) except json.JSONDecodeError as e: logger.error("Invalid settings JSON", error=str(e)) @@ -94,6 +94,7 @@ async def langflow_upload_ingest_task( if tweaks_json: try: import json + tweaks = json.loads(tweaks_json) except json.JSONDecodeError as e: logger.error("Invalid tweaks JSON", error=str(e)) @@ -107,11 +108,14 @@ async def langflow_upload_ingest_task( jwt_token = getattr(request.state, "jwt_token", None) if not user_id: - return JSONResponse({"error": "User authentication required"}, status_code=401) + return JSONResponse( + {"error": "User authentication required"}, status_code=401 + ) # Create temporary files for task processing import tempfile import os + temp_file_paths = [] original_filenames = [] @@ -132,7 +136,7 @@ async def langflow_upload_ingest_task( temp_path = os.path.join(temp_dir, safe_filename) # Write content to temp file - with open(temp_path, 'wb') as temp_file: + with open(temp_path, "wb") as temp_file: temp_file.write(content) temp_file_paths.append(temp_path) @@ -143,22 +147,22 @@ async def langflow_upload_ingest_task( user_id=user_id, has_settings=bool(settings), has_tweaks=bool(tweaks), - delete_after_ingest=delete_after_ingest + delete_after_ingest=delete_after_ingest, ) # Create langflow upload task - print(f"tweaks: {tweaks}") - print(f"settings: {settings}") - print(f"jwt_token: {jwt_token}") - print(f"user_name: {user_name}") - print(f"user_email: {user_email}") - print(f"session_id: {session_id}") - print(f"delete_after_ingest: {delete_after_ingest}") - print(f"temp_file_paths: {temp_file_paths}") + logger.debug( + f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}", + ) + # Create a map between temp_file_paths and original_filenames + file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames)) + logger.debug( + f"File path to original filename map: {file_path_to_original_filename}", + ) task_id = await task_service.create_langflow_upload_task( user_id=user_id, file_paths=temp_file_paths, - original_filenames=original_filenames, + original_filenames=file_path_to_original_filename, langflow_file_service=langflow_file_service, session_manager=session_manager, jwt_token=jwt_token, @@ -172,20 +176,24 @@ async def langflow_upload_ingest_task( ) logger.debug("Langflow upload task created successfully", task_id=task_id) - - return JSONResponse({ - "task_id": task_id, - "message": f"Langflow upload task created for {len(upload_files)} file(s)", - "file_count": len(upload_files) - }, status_code=202) # 202 Accepted for async processing - + + return JSONResponse( + { + "task_id": task_id, + "message": f"Langflow upload task created for {len(upload_files)} file(s)", + "file_count": len(upload_files), + }, + status_code=202, + ) # 202 Accepted for async processing + except Exception: # Clean up temp files on error from utils.file_utils import safe_unlink + for temp_path in temp_file_paths: safe_unlink(temp_path) raise - + except Exception as e: logger.error( "Task-based langflow upload_ingest endpoint failed", @@ -193,5 +201,6 @@ async def langflow_upload_ingest_task( error=str(e), ) import traceback + logger.error("Full traceback", traceback=traceback.format_exc()) return JSONResponse({"error": str(e)}, status_code=500) diff --git a/src/services/task_service.py b/src/services/task_service.py index eb5825c0..735ad483 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -1,6 +1,5 @@ import asyncio import random -from typing import Dict, Optional import time import uuid @@ -59,7 +58,7 @@ class TaskService: file_paths: list, langflow_file_service, session_manager, - original_filenames: list = None, + original_filenames: dict | None = None, jwt_token: str = None, owner_name: str = None, owner_email: str = None, @@ -88,7 +87,7 @@ class TaskService: ) return await self.create_custom_task(user_id, file_paths, processor, original_filenames) - async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: list = None) -> str: + async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str: """Create a new task with custom processor for any type of items""" import os # Store anonymous tasks under a stable key so they can be retrieved later @@ -96,14 +95,18 @@ class TaskService: task_id = str(uuid.uuid4()) # Create file tasks with original filenames if provided - file_tasks = {} - for i, item in enumerate(items): - if original_filenames and i < len(original_filenames): - filename = original_filenames[i] - else: - filename = os.path.basename(str(item)) - - file_tasks[str(item)] = FileTask(file_path=str(item), filename=filename) + normalized_originals = ( + {str(k): v for k, v in original_filenames.items()} if original_filenames else {} + ) + file_tasks = { + str(item): FileTask( + file_path=str(item), + filename=normalized_originals.get( + str(item), os.path.basename(str(item)) + ), + ) + for item in items + } upload_task = UploadTask( task_id=task_id, From eef9e380dc3f76956155fc3cdad80d48a2a80cbc Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 09:45:21 -0300 Subject: [PATCH 19/23] Added opensearch queries --- src/utils/opensearch_queries.py | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/utils/opensearch_queries.py diff --git a/src/utils/opensearch_queries.py b/src/utils/opensearch_queries.py new file mode 100644 index 00000000..f29c6283 --- /dev/null +++ b/src/utils/opensearch_queries.py @@ -0,0 +1,55 @@ +""" +Utility functions for constructing OpenSearch queries consistently. +""" +from typing import Union, List + + +def build_filename_query(filename: str) -> dict: + """ + Build a standardized query for finding documents by filename. + + Args: + filename: The exact filename to search for + + Returns: + A dict containing the OpenSearch query body + """ + return { + "term": { + "filename": filename + } + } + + +def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict: + """ + Build a complete search body for checking if a filename exists. + + Args: + filename: The exact filename to search for + size: Number of results to return (default: 1) + source: Whether to include source fields, or list of specific fields to include (default: False) + + Returns: + A dict containing the complete OpenSearch search body + """ + return { + "query": build_filename_query(filename), + "size": size, + "_source": source + } + + +def build_filename_delete_body(filename: str) -> dict: + """ + Build a delete-by-query body for removing all documents with a filename. + + Args: + filename: The exact filename to delete + + Returns: + A dict containing the OpenSearch delete-by-query body + """ + return { + "query": build_filename_query(filename) + } From 95bdc59af0bad389fca636b92f30621cbe2961b3 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 09:45:33 -0300 Subject: [PATCH 20/23] Use opensearch queries on documents and processors --- src/api/documents.py | 32 ++++++-------------------------- src/models/processors.py | 20 ++++---------------- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/src/api/documents.py b/src/api/documents.py index a367c9fe..048f746a 100644 --- a/src/api/documents.py +++ b/src/api/documents.py @@ -23,20 +23,9 @@ async def check_filename_exists(request: Request, document_service, session_mana ) # Search for any document with this exact filename - # Try both .keyword (exact match) and regular field (analyzed match) - search_body = { - "query": { - "bool": { - "should": [ - {"term": {"filename.keyword": filename}}, - {"term": {"filename": filename}} - ], - "minimum_should_match": 1 - } - }, - "size": 1, - "_source": ["filename"] - } + from utils.opensearch_queries import build_filename_search_body + + search_body = build_filename_search_body(filename, size=1, source=["filename"]) logger.debug(f"Checking filename existence: {filename}") @@ -83,18 +72,9 @@ async def delete_documents_by_filename(request: Request, document_service, sessi ) # Delete by query to remove all chunks of this document - # Use both .keyword and regular field to ensure we catch all variations - delete_query = { - "query": { - "bool": { - "should": [ - {"term": {"filename.keyword": filename}}, - {"term": {"filename": filename}} - ], - "minimum_should_match": 1 - } - } - } + from utils.opensearch_queries import build_filename_delete_body + + delete_query = build_filename_delete_body(filename) logger.debug(f"Deleting documents with filename: {filename}") diff --git a/src/models/processors.py b/src/models/processors.py index 6d7b74b4..4a5d96b5 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -65,6 +65,7 @@ class TaskProcessor: Returns True if any chunks with this filename exist. """ from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_search_body import asyncio max_retries = 3 @@ -73,15 +74,7 @@ class TaskProcessor: for attempt in range(max_retries): try: # Search for any document with this exact filename - search_body = { - "query": { - "term": { - "filename.keyword": filename - } - }, - "size": 1, - "_source": False - } + search_body = build_filename_search_body(filename, size=1, source=False) response = await opensearch_client.search( index=INDEX_NAME, @@ -126,16 +119,11 @@ class TaskProcessor: Delete all chunks of a document with the given filename from OpenSearch. """ from config.settings import INDEX_NAME + from utils.opensearch_queries import build_filename_delete_body try: # Delete all documents with this filename - delete_body = { - "query": { - "term": { - "filename.keyword": filename - } - } - } + delete_body = build_filename_delete_body(filename) response = await opensearch_client.delete_by_query( index=INDEX_NAME, From 1e3f28f9ed1295f0fd737131f7e3b5f4e0fbd61e Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 11:51:13 -0300 Subject: [PATCH 21/23] removed console logs --- frontend/src/contexts/task-context.tsx | 39 ++++++++++++++++++-------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index f5d0c0c4..b3d1f647 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -119,7 +119,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { previousTasksRef.current = tasks; return; } - console.log(tasks, previousTasksRef.current); // Check for task status changes by comparing with previous tasks tasks.forEach((currentTask) => { @@ -128,18 +127,22 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { ); // Only show toasts if we have previous data and status has changed - if (((previousTask && previousTask.status !== currentTask.status) || (!previousTask && previousTasksRef.current.length !== 0))) { - console.log("task status changed", currentTask.status); + if ( + (previousTask && previousTask.status !== currentTask.status) || + (!previousTask && previousTasksRef.current.length !== 0) + ) { // Process files from failed task and add them to files list if (currentTask.files && typeof currentTask.files === "object") { - console.log("processing files", currentTask.files); const taskFileEntries = Object.entries(currentTask.files); const now = new Date().toISOString(); taskFileEntries.forEach(([filePath, fileInfo]) => { if (typeof fileInfo === "object" && fileInfo) { // Use the filename from backend if available, otherwise extract from path - const fileName = (fileInfo as any).filename || filePath.split("/").pop() || filePath; + const fileName = + (fileInfo as any).filename || + filePath.split("/").pop() || + filePath; const fileStatus = fileInfo.status as string; // Map backend file status to our TaskFile status @@ -205,7 +208,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { }); } if ( - previousTask && previousTask.status !== "completed" && + previousTask && + previousTask.status !== "completed" && currentTask.status === "completed" ) { // Task just completed - show success toast with file counts @@ -214,9 +218,15 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { let description = ""; if (failedFiles > 0) { - description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully, ${failedFiles} file${failedFiles !== 1 ? 's' : ''} failed`; + description = `${successfulFiles} file${ + successfulFiles !== 1 ? "s" : "" + } uploaded successfully, ${failedFiles} file${ + failedFiles !== 1 ? "s" : "" + } failed`; } else { - description = `${successfulFiles} file${successfulFiles !== 1 ? 's' : ''} uploaded successfully`; + description = `${successfulFiles} file${ + successfulFiles !== 1 ? "s" : "" + } uploaded successfully`; } toast.success("Task completed", { @@ -230,13 +240,18 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { }, }); setTimeout(() => { - refetchSearch(); - setFiles((prevFiles) => - prevFiles.filter((file) => file.task_id !== currentTask.task_id || file.status === "failed"), + setFiles((prevFiles) => + prevFiles.filter( + (file) => + file.task_id !== currentTask.task_id || + file.status === "failed", + ), ); + refetchSearch(); }, 500); } else if ( - previousTask && previousTask.status !== "failed" && + previousTask && + previousTask.status !== "failed" && previousTask.status !== "error" && (currentTask.status === "failed" || currentTask.status === "error") ) { From 640dbcee20b46d9d1eeaa7dc9227ecfc4e2b16e3 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 11:52:18 -0300 Subject: [PATCH 22/23] remove unused function --- frontend/src/contexts/task-context.tsx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/frontend/src/contexts/task-context.tsx b/frontend/src/contexts/task-context.tsx index b3d1f647..12ad3c24 100644 --- a/frontend/src/contexts/task-context.tsx +++ b/frontend/src/contexts/task-context.tsx @@ -36,7 +36,6 @@ interface TaskContextType { files: TaskFile[]; addTask: (taskId: string) => void; addFiles: (files: Partial[], taskId: string) => void; - removeTask: (taskId: string) => void; refreshTasks: () => Promise; cancelTask: (taskId: string) => Promise; isPolling: boolean; @@ -285,10 +284,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { await refetchTasks(); }, [refetchTasks]); - const removeTask = useCallback((_taskId: string) => { - // This is now handled by React Query automatically - // Tasks will be removed from the list when they're no longer returned by the API - }, []); const cancelTask = useCallback( async (taskId: string) => { @@ -316,7 +311,6 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { files, addTask, addFiles, - removeTask, refreshTasks, cancelTask, isPolling, From 641a24aa0664cf3a1f33f98da2ecabd85127f2ed Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Mon, 6 Oct 2025 11:57:51 -0300 Subject: [PATCH 23/23] removed unused file --- .../app/api/queries/useGetTaskStatusQuery.ts | 80 ------------------- 1 file changed, 80 deletions(-) delete mode 100644 frontend/src/app/api/queries/useGetTaskStatusQuery.ts diff --git a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts b/frontend/src/app/api/queries/useGetTaskStatusQuery.ts deleted file mode 100644 index 17cd2d16..00000000 --- a/frontend/src/app/api/queries/useGetTaskStatusQuery.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { - type UseQueryOptions, - useQuery, - useQueryClient, -} from "@tanstack/react-query"; - -export interface TaskStatus { - task_id: string; - status: - | "pending" - | "running" - | "processing" - | "completed" - | "failed" - | "error"; - total_files?: number; - processed_files?: number; - successful_files?: number; - failed_files?: number; - running_files?: number; - pending_files?: number; - created_at: string; - updated_at: string; - duration_seconds?: number; - result?: Record; - error?: string; - files?: Record>; -} - -export const useGetTaskStatusQuery = ( - taskId: string, - options?: Omit, "queryKey" | "queryFn"> -) => { - const queryClient = useQueryClient(); - - async function getTaskStatus(): Promise { - if (!taskId) { - return null; - } - - const response = await fetch(`/api/tasks/${taskId}`); - - if (!response.ok) { - if (response.status === 404) { - return null; // Task not found - } - throw new Error("Failed to fetch task status"); - } - - return response.json(); - } - - const queryResult = useQuery( - { - queryKey: ["task-status", taskId], - queryFn: getTaskStatus, - refetchInterval: (data) => { - // Only poll if the task is still active - if (!data) { - return false; // Stop polling if no data - } - - const isActive = - data.status === "pending" || - data.status === "running" || - data.status === "processing"; - - return isActive ? 3000 : false; // Poll every 3 seconds if active - }, - refetchIntervalInBackground: true, - staleTime: 0, // Always consider data stale to ensure fresh updates - gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes - enabled: !!taskId, // Only run if taskId is provided - ...options, - }, - queryClient, - ); - - return queryResult; -};