Merge branch 'main' into feat/knowledge-page-sweep

This commit is contained in:
Cole Goldsmith 2025-10-06 13:22:50 -05:00
commit e4ae80860a
19 changed files with 1990 additions and 1369 deletions

View file

@ -0,0 +1,66 @@
"use client";
import { RotateCcw } from "lucide-react";
import type React from "react";
import { Button } from "./ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "./ui/dialog";
interface DuplicateHandlingDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onOverwrite: () => void | Promise<void>;
isLoading?: boolean;
}
export const DuplicateHandlingDialog: React.FC<
DuplicateHandlingDialogProps
> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => {
const handleOverwrite = async () => {
await onOverwrite();
onOpenChange(false);
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[450px]">
<DialogHeader>
<DialogTitle>Overwrite document</DialogTitle>
<DialogDescription className="pt-2 text-muted-foreground">
Overwriting will replace the existing document with another version.
This can't be undone.
</DialogDescription>
</DialogHeader>
<DialogFooter className="flex-row gap-2 justify-end">
<Button
type="button"
variant="ghost"
onClick={() => onOpenChange(false)}
disabled={isLoading}
size="sm"
>
Cancel
</Button>
<Button
type="button"
variant="default"
size="sm"
onClick={handleOverwrite}
disabled={isLoading}
className="flex items-center gap-2 !bg-accent-amber-foreground hover:!bg-foreground text-primary-foreground"
>
<RotateCcw className="h-3.5 w-3.5" />
Overwrite
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};

View file

@ -1,16 +1,20 @@
"use client"; "use client";
import { useQueryClient } from "@tanstack/react-query";
import { import {
ChevronDown, ChevronDown,
Cloud, Cloud,
File,
FolderOpen, FolderOpen,
Loader2, Loader2,
PlugZap, PlugZap,
Plus,
Upload,
} from "lucide-react"; } from "lucide-react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { useEffect, useRef, useState } from "react"; import { useEffect, useRef, useState } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useGetTasksQuery } from "@/app/api/queries/useGetTasksQuery";
import { DuplicateHandlingDialog } from "@/components/duplicate-handling-dialog";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { import {
Dialog, Dialog,
@ -23,16 +27,25 @@ import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import GoogleDriveIcon from "@/app/settings/icons/google-drive-icon"; import type { File as SearchFile } from "@/src/app/api/queries/useGetSearchQuery";
import SharePointIcon from "@/app/settings/icons/share-point-icon";
import OneDriveIcon from "@/app/settings/icons/one-drive-icon";
export function KnowledgeDropdown() { interface KnowledgeDropdownProps {
active?: boolean;
variant?: "navigation" | "button";
}
export function KnowledgeDropdown({
active,
variant = "navigation",
}: KnowledgeDropdownProps) {
const { addTask } = useTask(); const { addTask } = useTask();
const { refetch: refetchTasks } = useGetTasksQuery();
const queryClient = useQueryClient();
const router = useRouter(); const router = useRouter();
const [isOpen, setIsOpen] = useState(false); const [isOpen, setIsOpen] = useState(false);
const [showFolderDialog, setShowFolderDialog] = useState(false); const [showFolderDialog, setShowFolderDialog] = useState(false);
const [showS3Dialog, setShowS3Dialog] = useState(false); const [showS3Dialog, setShowS3Dialog] = useState(false);
const [showDuplicateDialog, setShowDuplicateDialog] = useState(false);
const [awsEnabled, setAwsEnabled] = useState(false); const [awsEnabled, setAwsEnabled] = useState(false);
const [folderPath, setFolderPath] = useState("/app/documents/"); const [folderPath, setFolderPath] = useState("/app/documents/");
const [bucketUrl, setBucketUrl] = useState("s3://"); const [bucketUrl, setBucketUrl] = useState("s3://");
@ -40,6 +53,8 @@ export function KnowledgeDropdown() {
const [s3Loading, setS3Loading] = useState(false); const [s3Loading, setS3Loading] = useState(false);
const [fileUploading, setFileUploading] = useState(false); const [fileUploading, setFileUploading] = useState(false);
const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false); const [isNavigatingToCloud, setIsNavigatingToCloud] = useState(false);
const [pendingFile, setPendingFile] = useState<File | null>(null);
const [duplicateFilename, setDuplicateFilename] = useState<string>("");
const [cloudConnectors, setCloudConnectors] = useState<{ const [cloudConnectors, setCloudConnectors] = useState<{
[key: string]: { [key: string]: {
name: string; name: string;
@ -51,15 +66,6 @@ export function KnowledgeDropdown() {
const fileInputRef = useRef<HTMLInputElement>(null); const fileInputRef = useRef<HTMLInputElement>(null);
const dropdownRef = useRef<HTMLDivElement>(null); const dropdownRef = useRef<HTMLDivElement>(null);
const connectorIconMap: Record<
string,
React.ComponentType<{ className?: string }>
> = {
google_drive: GoogleDriveIcon,
sharepoint: SharePointIcon,
onedrive: OneDriveIcon,
};
// Check AWS availability and cloud connectors on mount // Check AWS availability and cloud connectors on mount
useEffect(() => { useEffect(() => {
const checkAvailability = async () => { const checkAvailability = async () => {
@ -106,7 +112,7 @@ export function KnowledgeDropdown() {
const connections = statusData.connections || []; const connections = statusData.connections || [];
const activeConnection = connections.find( const activeConnection = connections.find(
(conn: { is_active: boolean; connection_id: string }) => (conn: { is_active: boolean; connection_id: string }) =>
conn.is_active conn.is_active,
); );
const isConnected = activeConnection !== undefined; const isConnected = activeConnection !== undefined;
@ -116,7 +122,7 @@ export function KnowledgeDropdown() {
// Check token availability // Check token availability
try { try {
const tokenRes = await fetch( const tokenRes = await fetch(
`/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}` `/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`,
); );
if (tokenRes.ok) { if (tokenRes.ok) {
const tokenData = await tokenRes.json(); const tokenData = await tokenRes.json();
@ -169,20 +175,75 @@ export function KnowledgeDropdown() {
const handleFileChange = async (e: React.ChangeEvent<HTMLInputElement>) => { const handleFileChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files; const files = e.target.files;
if (files && files.length > 0) { if (files && files.length > 0) {
// Close dropdown and disable button immediately after file selection const file = files[0];
// Close dropdown immediately after file selection
setIsOpen(false); setIsOpen(false);
try {
// Check if filename already exists (using ORIGINAL filename)
console.log("[Duplicate Check] Checking file:", file.name);
const checkResponse = await fetch(
`/api/documents/check-filename?filename=${encodeURIComponent(file.name)}`,
);
console.log("[Duplicate Check] Response status:", checkResponse.status);
if (!checkResponse.ok) {
const errorText = await checkResponse.text();
console.error("[Duplicate Check] Error response:", errorText);
throw new Error(
`Failed to check duplicates: ${checkResponse.statusText}`,
);
}
const checkData = await checkResponse.json();
console.log("[Duplicate Check] Result:", checkData);
if (checkData.exists) {
// Show duplicate handling dialog
console.log("[Duplicate Check] Duplicate detected, showing dialog");
setPendingFile(file);
setDuplicateFilename(file.name);
setShowDuplicateDialog(true);
// Reset file input
if (fileInputRef.current) {
fileInputRef.current.value = "";
}
return;
}
// No duplicate, proceed with upload
console.log("[Duplicate Check] No duplicate, proceeding with upload");
await uploadFile(file, false);
} catch (error) {
console.error("[Duplicate Check] Exception:", error);
toast.error("Failed to check for duplicates", {
description: error instanceof Error ? error.message : "Unknown error",
});
}
}
// Reset file input
if (fileInputRef.current) {
fileInputRef.current.value = "";
}
};
const uploadFile = async (file: File, replace: boolean) => {
setFileUploading(true); setFileUploading(true);
// Trigger the same file upload event as the chat page // Trigger the same file upload event as the chat page
window.dispatchEvent( window.dispatchEvent(
new CustomEvent("fileUploadStart", { new CustomEvent("fileUploadStart", {
detail: { filename: files[0].name }, detail: { filename: file.name },
}) }),
); );
try { try {
const formData = new FormData(); const formData = new FormData();
formData.append("file", files[0]); formData.append("file", file);
formData.append("replace_duplicates", replace.toString());
// Use router upload and ingest endpoint (automatically routes based on configuration) // Use router upload and ingest endpoint (automatically routes based on configuration)
const uploadIngestRes = await fetch("/api/router/upload_ingest", { const uploadIngestRes = await fetch("/api/router/upload_ingest", {
@ -193,24 +254,22 @@ export function KnowledgeDropdown() {
const uploadIngestJson = await uploadIngestRes.json(); const uploadIngestJson = await uploadIngestRes.json();
if (!uploadIngestRes.ok) { if (!uploadIngestRes.ok) {
throw new Error( throw new Error(uploadIngestJson?.error || "Upload and ingest failed");
uploadIngestJson?.error || "Upload and ingest failed"
);
} }
// Extract results from the response - handle both unified and simple formats // Extract results from the response - handle both unified and simple formats
const fileId = uploadIngestJson?.upload?.id || uploadIngestJson?.id; const fileId =
uploadIngestJson?.upload?.id ||
uploadIngestJson?.id ||
uploadIngestJson?.task_id;
const filePath = const filePath =
uploadIngestJson?.upload?.path || uploadIngestJson?.upload?.path || uploadIngestJson?.path || "uploaded";
uploadIngestJson?.path ||
"uploaded";
const runJson = uploadIngestJson?.ingestion; const runJson = uploadIngestJson?.ingestion;
const deleteResult = uploadIngestJson?.deletion; const deleteResult = uploadIngestJson?.deletion;
console.log("c", uploadIngestJson);
if (!fileId) { if (!fileId) {
throw new Error("Upload successful but no file id returned"); throw new Error("Upload successful but no file id returned");
} }
// Check if ingestion actually succeeded // Check if ingestion actually succeeded
if ( if (
runJson && runJson &&
@ -219,30 +278,28 @@ export function KnowledgeDropdown() {
) { ) {
const errorMsg = runJson.error || "Ingestion pipeline failed"; const errorMsg = runJson.error || "Ingestion pipeline failed";
throw new Error( throw new Error(
`Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.` `Ingestion failed: ${errorMsg}. Try setting DISABLE_INGEST_WITH_LANGFLOW=true if you're experiencing Langflow component issues.`,
); );
} }
// Log deletion status if provided // Log deletion status if provided
if (deleteResult) { if (deleteResult) {
if (deleteResult.status === "deleted") { if (deleteResult.status === "deleted") {
console.log( console.log(
"File successfully cleaned up from Langflow:", "File successfully cleaned up from Langflow:",
deleteResult.file_id deleteResult.file_id,
); );
} else if (deleteResult.status === "delete_failed") { } else if (deleteResult.status === "delete_failed") {
console.warn( console.warn(
"Failed to cleanup file from Langflow:", "Failed to cleanup file from Langflow:",
deleteResult.error deleteResult.error,
); );
} }
} }
// Notify UI // Notify UI
window.dispatchEvent( window.dispatchEvent(
new CustomEvent("fileUploaded", { new CustomEvent("fileUploaded", {
detail: { detail: {
file: files[0], file: file,
result: { result: {
file_id: fileId, file_id: fileId,
file_path: filePath, file_path: filePath,
@ -251,30 +308,39 @@ export function KnowledgeDropdown() {
unified: true, unified: true,
}, },
}, },
}) }),
); );
// Trigger search refresh after successful ingestion refetchTasks();
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} catch (error) { } catch (error) {
window.dispatchEvent( window.dispatchEvent(
new CustomEvent("fileUploadError", { new CustomEvent("fileUploadError", {
detail: { detail: {
filename: files[0].name, filename: file.name,
error: error instanceof Error ? error.message : "Upload failed", error: error instanceof Error ? error.message : "Upload failed",
}, },
}) }),
); );
} finally { } finally {
window.dispatchEvent(new CustomEvent("fileUploadComplete")); window.dispatchEvent(new CustomEvent("fileUploadComplete"));
setFileUploading(false); setFileUploading(false);
// Don't call refetchSearch() here - the knowledgeUpdated event will handle it
}
} }
};
// Reset file input const handleOverwriteFile = async () => {
if (fileInputRef.current) { if (pendingFile) {
fileInputRef.current.value = ""; // Remove the old file from all search query caches before overwriting
queryClient.setQueriesData({ queryKey: ["search"] }, (oldData: []) => {
if (!oldData) return oldData;
// Filter out the file that's being overwritten
return oldData.filter(
(file: SearchFile) => file.filename !== pendingFile.name,
);
});
await uploadFile(pendingFile, true);
setPendingFile(null);
setDuplicateFilename("");
} }
}; };
@ -304,17 +370,12 @@ export function KnowledgeDropdown() {
addTask(taskId); addTask(taskId);
setFolderPath(""); setFolderPath("");
// Trigger search refresh after successful folder processing starts // Refetch tasks to show the new task
console.log( refetchTasks();
"Folder upload successful, dispatching knowledgeUpdated event"
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} else if (response.ok) { } else if (response.ok) {
setFolderPath(""); setFolderPath("");
console.log( // Refetch tasks even for direct uploads in case tasks were created
"Folder upload successful (direct), dispatching knowledgeUpdated event" refetchTasks();
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} else { } else {
console.error("Folder upload failed:", result.error); console.error("Folder upload failed:", result.error);
if (response.status === 400) { if (response.status === 400) {
@ -327,7 +388,6 @@ export function KnowledgeDropdown() {
console.error("Folder upload error:", error); console.error("Folder upload error:", error);
} finally { } finally {
setFolderLoading(false); setFolderLoading(false);
// Don't call refetchSearch() here - the knowledgeUpdated event will handle it
} }
}; };
@ -357,9 +417,8 @@ export function KnowledgeDropdown() {
addTask(taskId); addTask(taskId);
setBucketUrl("s3://"); setBucketUrl("s3://");
// Trigger search refresh after successful S3 processing starts // Refetch tasks to show the new task
console.log("S3 upload successful, dispatching knowledgeUpdated event"); refetchTasks();
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
} else { } else {
console.error("S3 upload failed:", result.error); console.error("S3 upload failed:", result.error);
if (response.status === 400) { if (response.status === 400) {
@ -372,7 +431,6 @@ export function KnowledgeDropdown() {
console.error("S3 upload error:", error); console.error("S3 upload error:", error);
} finally { } finally {
setS3Loading(false); setS3Loading(false);
// Don't call refetchSearch() here - the knowledgeUpdated event will handle it
} }
}; };
@ -380,7 +438,7 @@ export function KnowledgeDropdown() {
.filter(([, info]) => info.available) .filter(([, info]) => info.available)
.map(([type, info]) => ({ .map(([type, info]) => ({
label: info.name, label: info.name,
icon: connectorIconMap[type] || PlugZap, icon: PlugZap,
onClick: async () => { onClick: async () => {
setIsOpen(false); setIsOpen(false);
if (info.connected && info.hasToken) { if (info.connected && info.hasToken) {
@ -407,7 +465,7 @@ export function KnowledgeDropdown() {
const menuItems = [ const menuItems = [
{ {
label: "Add File", label: "Add File",
icon: File, icon: Upload,
onClick: handleFileUpload, onClick: handleFileUpload,
}, },
{ {
@ -440,12 +498,28 @@ export function KnowledgeDropdown() {
return ( return (
<> <>
<div ref={dropdownRef} className="relative"> <div ref={dropdownRef} className="relative">
<Button <button
type="button"
onClick={() => !isLoading && setIsOpen(!isOpen)} onClick={() => !isLoading && setIsOpen(!isOpen)}
disabled={isLoading} disabled={isLoading}
className={cn(
variant === "button"
? "rounded-lg h-12 px-4 flex items-center gap-2 bg-primary text-primary-foreground hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
: "text-sm group flex p-3 w-full justify-start font-medium cursor-pointer hover:bg-accent hover:text-accent-foreground rounded-lg transition-all disabled:opacity-50 disabled:cursor-not-allowed",
variant === "navigation" && active
? "bg-accent text-accent-foreground shadow-sm"
: variant === "navigation"
? "text-foreground hover:text-accent-foreground"
: "",
)}
> >
{variant === "button" ? (
<> <>
{isLoading && <Loader2 className="h-4 w-4 animate-spin" />} {isLoading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Plus className="h-4 w-4" />
)}
<span> <span>
{isLoading {isLoading
? fileUploading ? fileUploading
@ -463,32 +537,57 @@ export function KnowledgeDropdown() {
<ChevronDown <ChevronDown
className={cn( className={cn(
"h-4 w-4 transition-transform", "h-4 w-4 transition-transform",
isOpen && "rotate-180" isOpen && "rotate-180",
)} )}
/> />
)} )}
</> </>
</Button> ) : (
<>
<div className="flex items-center flex-1">
{isLoading ? (
<Loader2 className="h-4 w-4 mr-3 shrink-0 animate-spin" />
) : (
<Upload
className={cn(
"h-4 w-4 mr-3 shrink-0",
active
? "text-accent-foreground"
: "text-muted-foreground group-hover:text-foreground",
)}
/>
)}
Knowledge
</div>
{!isLoading && (
<ChevronDown
className={cn(
"h-4 w-4 transition-transform",
isOpen && "rotate-180",
)}
/>
)}
</>
)}
</button>
{isOpen && !isLoading && ( {isOpen && !isLoading && (
<div className="absolute top-full left-0 right-0 mt-1 bg-popover border border-border rounded-lg shadow-md z-50"> <div className="absolute top-full left-0 right-0 mt-1 bg-popover border border-border rounded-md shadow-md z-50">
<div className="p-1"> <div className="py-1">
{menuItems.map((item, index) => ( {menuItems.map((item, index) => (
<button <button
key={index} key={`${item.label}-${index}`}
type="button"
onClick={item.onClick} onClick={item.onClick}
disabled={"disabled" in item ? item.disabled : false} disabled={"disabled" in item ? item.disabled : false}
title={"tooltip" in item ? item.tooltip : undefined} title={"tooltip" in item ? item.tooltip : undefined}
className={cn( className={cn(
"w-full flex flex-nowrap items-center px-3 py-2 text-left text-sm hover:bg-accent hover:text-accent-foreground rounded-sm", "w-full px-3 py-2 text-left text-sm hover:bg-accent hover:text-accent-foreground",
"disabled" in item && "disabled" in item &&
item.disabled && item.disabled &&
"opacity-50 cursor-not-allowed hover:bg-transparent hover:text-current" "opacity-50 cursor-not-allowed hover:bg-transparent hover:text-current",
)} )}
> >
{item.icon && (
<item.icon className="h-4 w-4 mr-2 text-muted-foreground" />
)}
{item.label} {item.label}
</button> </button>
))} ))}
@ -584,6 +683,14 @@ export function KnowledgeDropdown() {
</div> </div>
</DialogContent> </DialogContent>
</Dialog> </Dialog>
{/* Duplicate Handling Dialog */}
<DuplicateHandlingDialog
open={showDuplicateDialog}
onOpenChange={setShowDuplicateDialog}
onOverwrite={handleOverwriteFile}
isLoading={fileUploading}
/>
</> </>
); );
} }

View file

@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps<SVGSVGElement>) {
{...props} {...props}
> >
<title>IBM watsonx.ai Logo</title> <title>IBM watsonx.ai Logo</title>
<g clip-path="url(#clip0_2620_2081)"> <g clipPath="url(#clip0_2620_2081)">
<path <path
d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z" d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z"
fill="currentColor" fill="currentColor"

View file

@ -44,7 +44,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
placeholder={placeholder} placeholder={placeholder}
className={cn( className={cn(
"primary-input", "primary-input",
icon && "pl-9", icon && "!pl-9",
type === "password" && "!pr-8", type === "password" && "!pr-8",
icon ? inputClassName : className icon ? inputClassName : className
)} )}

View file

@ -0,0 +1,47 @@
import {
type UseMutationOptions,
useMutation,
useQueryClient,
} from "@tanstack/react-query";
export interface CancelTaskRequest {
taskId: string;
}
export interface CancelTaskResponse {
status: string;
task_id: string;
}
export const useCancelTaskMutation = (
options?: Omit<
UseMutationOptions<CancelTaskResponse, Error, CancelTaskRequest>,
"mutationFn"
>
) => {
const queryClient = useQueryClient();
async function cancelTask(
variables: CancelTaskRequest,
): Promise<CancelTaskResponse> {
const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, {
method: "POST",
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
return response.json();
}
return useMutation({
mutationFn: cancelTask,
onSuccess: () => {
// Invalidate tasks query to refresh the list
queryClient.invalidateQueries({ queryKey: ["tasks"] });
},
...options,
});
};

View file

@ -0,0 +1,79 @@
import {
type UseQueryOptions,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
export interface Task {
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TasksResponse {
tasks: Task[];
}
export const useGetTasksQuery = (
options?: Omit<UseQueryOptions<Task[]>, "queryKey" | "queryFn">
) => {
const queryClient = useQueryClient();
async function getTasks(): Promise<Task[]> {
const response = await fetch("/api/tasks");
if (!response.ok) {
throw new Error("Failed to fetch tasks");
}
const data: TasksResponse = await response.json();
return data.tasks || [];
}
const queryResult = useQuery(
{
queryKey: ["tasks"],
queryFn: getTasks,
refetchInterval: (query) => {
// Only poll if there are tasks with pending or running status
const data = query.state.data;
if (!data || data.length === 0) {
return false; // Stop polling if no tasks
}
const hasActiveTasks = data.some(
(task: Task) =>
task.status === "pending" ||
task.status === "running" ||
task.status === "processing"
);
return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist
},
refetchIntervalInBackground: true,
staleTime: 0, // Always consider data stale to ensure fresh updates
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
...options,
},
queryClient,
);
return queryResult;
};

View file

@ -1,10 +1,16 @@
"use client"; "use client";
import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react"; import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { import {
@ -42,7 +48,7 @@ function ChunksPageContent() {
() => () =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0, chunks.length || 0,
[chunks] [chunks],
); );
// const [selectAll, setSelectAll] = useState(false); // const [selectAll, setSelectAll] = useState(false);
@ -61,7 +67,7 @@ function ChunksPageContent() {
}, []); }, []);
const fileData = (data as File[]).find( const fileData = (data as File[]).find(
(file: File) => file.filename === filename (file: File) => file.filename === filename,
); );
// Extract chunks for the specific file // Extract chunks for the specific file
@ -72,18 +78,18 @@ function ChunksPageContent() {
} }
setChunks( setChunks(
fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [] fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [],
); );
}, [data, filename]); }, [data, filename]);
// Set selected state for all checkboxes when selectAll changes // Set selected state for all checkboxes when selectAll changes
// useEffect(() => { useEffect(() => {
// if (selectAll) { if (selectAll) {
// setSelectedChunks(new Set(chunks.map((_, index) => index))); setSelectedChunks(new Set(chunks.map((_, index) => index)));
// } else { } else {
// setSelectedChunks(new Set()); setSelectedChunks(new Set());
// } }
// }, [selectAll, setSelectedChunks, chunks]); }, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => { const handleBack = useCallback(() => {
router.push("/knowledge"); router.push("/knowledge");

View file

@ -1,71 +1,75 @@
"use client"; "use client";
import { themeQuartz, type ColDef } from "ag-grid-community"; import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Cloud, FileIcon } from "lucide-react"; import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { import {
type ChangeEvent,
useCallback, useCallback,
useEffect,
useRef, useRef,
useState, useState,
} from "react"; } from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useLayout } from "@/contexts/layout-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery"; import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css"; import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner"; import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { StatusBadge } from "@/components/ui/status-badge"; import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import GoogleDriveIcon from "../settings/icons/google-drive-icon";
import OneDriveIcon from "../settings/icons/one-drive-icon";
import SharePointIcon from "../settings/icons/share-point-icon";
import { KnowledgeSearchInput } from "@/components/knowledge-search-input";
// Function to get the appropriate icon for a connector type // Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) { function getSourceIcon(connectorType?: string) {
switch (connectorType) { switch (connectorType) {
case "google_drive": case "google_drive":
return ( return (
<GoogleDriveIcon className="h-4 w-4 text-foreground flex-shrink-0" /> <SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" />
); );
case "onedrive": case "onedrive":
return <OneDriveIcon className="h-4 w-4 text-foreground flex-shrink-0" />;
case "sharepoint":
return ( return (
<SharePointIcon className="h-4 w-4 text-foreground flex-shrink-0" /> <TbBrandOnedrive className="h-4 w-4 text-foreground flex-shrink-0" />
); );
case "sharepoint":
return <Building2 className="h-4 w-4 text-foreground flex-shrink-0" />;
case "s3": case "s3":
return <Cloud className="h-4 w-4 text-foreground flex-shrink-0" />; return <Cloud className="h-4 w-4 text-foreground flex-shrink-0" />;
default: default:
return ( return (
<FileIcon className="h-4 w-4 text-muted-foreground flex-shrink-0" /> <HardDrive className="h-4 w-4 text-muted-foreground flex-shrink-0" />
); );
} }
} }
function SearchPage() { function SearchPage() {
const router = useRouter(); const router = useRouter();
const { files: taskFiles } = useTask(); const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { const { totalTopOffset } = useLayout();
parsedFilterData, const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
queryOverride, useKnowledgeFilter();
} = useKnowledgeFilter();
const [selectedRows, setSelectedRows] = useState<File[]>([]); const [selectedRows, setSelectedRows] = useState<File[]>([]);
const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false); const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
const deleteDocumentMutation = useDeleteDocument(); const deleteDocumentMutation = useDeleteDocument();
const { data = [], isFetching } = useGetSearchQuery( useEffect(() => {
queryOverride, refreshTasks();
parsedFilterData }, [refreshTasks]);
);
const { data: searchData = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*",
parsedFilterData,
);
// Convert TaskFiles to File format and merge with backend results // Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return { return {
@ -78,13 +82,32 @@ function SearchPage() {
}; };
}); });
const backendFiles = data as File[]; // Create a map of task files by filename for quick lookup
const taskFileMap = new Map(
taskFilesAsFiles.map((file) => [file.filename, file]),
);
// Override backend files with task file status if they exist
const backendFiles = (searchData as File[])
.map((file) => {
const taskFile = taskFileMap.get(file.filename);
if (taskFile) {
// Override backend file with task file data (includes status)
return { ...file, ...taskFile };
}
return file;
})
.filter((file) => {
// Only filter out files that are currently processing AND in taskFiles
const taskFile = taskFileMap.get(file.filename);
return !taskFile || taskFile.status !== "processing";
});
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return ( return (
taskFile.status !== "active" && taskFile.status !== "active" &&
!backendFiles.some( !backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename (backendFile) => backendFile.filename === taskFile.filename,
) )
); );
}); });
@ -92,26 +115,42 @@ function SearchPage() {
// Combine task files first, then backend files // Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles]; const fileResults = [...backendFiles, ...filteredTaskFiles];
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
const gridRef = useRef<AgGridReact>(null); const gridRef = useRef<AgGridReact>(null);
const [columnDefs] = useState<ColDef<File>[]>([ const columnDefs = [
{ {
field: "filename", field: "filename",
headerName: "Source", headerName: "Source",
checkboxSelection: true, checkboxSelection: (params: CustomCellRendererProps<File>) =>
(params?.data?.status || "active") === "active",
headerCheckboxSelection: true, headerCheckboxSelection: true,
initialFlex: 2, initialFlex: 2,
minWidth: 220, minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => { cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
// Read status directly from data on each render
const status = data?.status || "active";
const isActive = status === "active";
console.log(data?.filename, status, "a");
return ( return (
<div className="flex items-center overflow-hidden w-full">
<div
className={`transition-opacity duration-200 ${isActive ? "w-0" : "w-7"}`}
></div>
<button <button
type="button" type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full" className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left flex-1 overflow-hidden"
onClick={() => { onClick={() => {
if (!isActive) {
return;
}
router.push( router.push(
`/knowledge/chunks?filename=${encodeURIComponent( `/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? "" data?.filename ?? "",
)}` )}`,
); );
}} }}
> >
@ -120,13 +159,14 @@ function SearchPage() {
{value} {value}
</span> </span>
</button> </button>
</div>
); );
}, },
}, },
{ {
field: "size", field: "size",
headerName: "Size", headerName: "Size",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-", params.value ? `${Math.round(params.value / 1024)} KB` : "-",
}, },
{ {
@ -136,17 +176,18 @@ function SearchPage() {
{ {
field: "owner", field: "owner",
headerName: "Owner", headerName: "Owner",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.owner_name || params.data?.owner_email || "—", params.data?.owner_name || params.data?.owner_email || "—",
}, },
{ {
field: "chunkCount", field: "chunkCount",
headerName: "Chunks", headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", valueFormatter: (params: CustomCellRendererProps<File>) => params.data?.chunkCount?.toString() || "-",
}, },
{ {
field: "avgScore", field: "avgScore",
headerName: "Avg score", headerName: "Avg score",
initialFlex: 0.5,
cellRenderer: ({ value }: CustomCellRendererProps<File>) => { cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return ( return (
<span className="text-xs text-accent-emerald-foreground bg-accent-emerald px-2 py-1 rounded"> <span className="text-xs text-accent-emerald-foreground bg-accent-emerald px-2 py-1 rounded">
@ -159,6 +200,7 @@ function SearchPage() {
field: "status", field: "status",
headerName: "Status", headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
console.log(data?.filename, data?.status, "b");
// Default to 'active' status if no status is provided // Default to 'active' status if no status is provided
const status = data?.status || "active"; const status = data?.status || "active";
return <StatusBadge status={status} />; return <StatusBadge status={status} />;
@ -166,6 +208,10 @@ function SearchPage() {
}, },
{ {
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
const status = data?.status || "active";
if (status !== "active") {
return null;
}
return <KnowledgeActionsDropdown filename={data?.filename || ""} />; return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
}, },
cellStyle: { cellStyle: {
@ -182,7 +228,7 @@ function SearchPage() {
sortable: false, sortable: false,
initialFlex: 0, initialFlex: 0,
}, },
]); ];
const defaultColDef: ColDef<File> = { const defaultColDef: ColDef<File> = {
resizable: false, resizable: false,
@ -204,7 +250,7 @@ function SearchPage() {
try { try {
// Delete each file individually since the API expects one filename at a time // Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) => const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename }) deleteDocumentMutation.mutateAsync({ filename: row.filename }),
); );
await Promise.all(deletePromises); await Promise.all(deletePromises);
@ -212,7 +258,7 @@ function SearchPage() {
toast.success( toast.success(
`Successfully deleted ${selectedRows.length} document${ `Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}` }`,
); );
setSelectedRows([]); setSelectedRows([]);
setShowBulkDeleteDialog(false); setShowBulkDeleteDialog(false);
@ -225,21 +271,74 @@ function SearchPage() {
toast.error( toast.error(
error instanceof Error error instanceof Error
? error.message ? error.message
: "Failed to delete some documents" : "Failed to delete some documents",
); );
} }
}; };
return ( return (
<> <div
<div className="flex flex-col h-full"> className={`fixed inset-0 md:left-72 flex flex-col transition-all duration-300 ${
isMenuOpen && isPanelOpen
? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel)
isMenuOpen
? "md:right-96"
: // Only menu open: 384px
isPanelOpen
? "md:right-80"
: // Only KF panel open: 320px
"md:right-6" // Neither open: 24px
}`}
style={{ top: `${totalTopOffset}px` }}
>
<div className="flex-1 flex flex-col min-h-0 px-6 py-6">
<div className="flex items-center justify-between mb-6"> <div className="flex items-center justify-between mb-6">
<h2 className="text-lg font-semibold">Knowledge</h2> <h2 className="text-lg font-semibold">Project Knowledge</h2>
<KnowledgeDropdown variant="button" />
</div> </div>
{/* Search Input Area */} {/* Search Input Area */}
<div className="flex-1 flex flex-shrink-0 flex-wrap-reverse gap-3 mb-6"> <div className="flex-shrink-0 mb-6 xl:max-w-[75%]">
<KnowledgeSearchInput /> <form className="flex gap-3">
<div className="primary-input min-h-10 !flex items-center flex-nowrap focus-within:border-foreground transition-colors !p-[0.3rem]">
{selectedFilter?.name && (
<div
className={`flex items-center gap-1 h-full px-1.5 py-0.5 mr-1 rounded max-w-[25%] ${
filterAccentClasses[parsedFilterData?.color || "zinc"]
}`}
>
<span className="truncate">{selectedFilter?.name}</span>
<X
aria-label="Remove filter"
className="h-4 w-4 flex-shrink-0 cursor-pointer"
onClick={() => setSelectedFilter(null)}
/>
</div>
)}
<Search
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
/>
<input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query"
id="search-query"
type="text"
placeholder="Enter your search query..."
onChange={handleTableSearch}
/>
</div>
{/* <Button
type="submit"
variant="outline"
className="rounded-lg p-0 flex-shrink-0"
>
{isFetching ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Search className="h-4 w-4" />
)}
</Button> */}
{/* //TODO: Implement sync button */} {/* //TODO: Implement sync button */}
{/* <Button {/* <Button
type="button" type="button"
@ -256,16 +355,14 @@ function SearchPage() {
className="rounded-lg flex-shrink-0" className="rounded-lg flex-shrink-0"
onClick={() => setShowBulkDeleteDialog(true)} onClick={() => setShowBulkDeleteDialog(true)}
> >
Delete <Trash2 className="h-4 w-4" /> Delete
</Button> </Button>
)} )}
<div className="ml-auto"> </form>
<KnowledgeDropdown />
</div>
</div> </div>
<AgGridReact <AgGridReact
className="w-full overflow-auto" className="w-full overflow-auto"
columnDefs={columnDefs} columnDefs={columnDefs as ColDef<File>[]}
defaultColDef={defaultColDef} defaultColDef={defaultColDef}
loading={isFetching} loading={isFetching}
ref={gridRef} ref={gridRef}
@ -273,9 +370,8 @@ function SearchPage() {
rowSelection="multiple" rowSelection="multiple"
rowMultiSelectWithClick={false} rowMultiSelectWithClick={false}
suppressRowClickSelection={true} suppressRowClickSelection={true}
getRowId={(params) => params.data.filename} getRowId={(params: GetRowIdParams<File>) => params.data?.filename}
domLayout="normal" domLayout="normal"
theme={themeQuartz.withParams({ browserColorScheme: "inherit" })}
onSelectionChanged={onSelectionChanged} onSelectionChanged={onSelectionChanged}
noRowsOverlayComponent={() => ( noRowsOverlayComponent={() => (
<div className="text-center pb-[45px]"> <div className="text-center pb-[45px]">
@ -299,12 +395,15 @@ function SearchPage() {
selectedRows.length selectedRows.length
} document${ } document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}? This will remove all chunks and data associated with these documents. This action cannot be undone.`} }? This will remove all chunks and data associated with these documents. This action cannot be undone.
Documents to be deleted:
${selectedRows.map((row) => `${row.filename}`).join("\n")}`}
confirmText="Delete All" confirmText="Delete All"
onConfirm={handleBulkDelete} onConfirm={handleBulkDelete}
isLoading={deleteDocumentMutation.isPending} isLoading={deleteDocumentMutation.isPending}
/> />
</> </div>
); );
} }

View file

@ -1,6 +1,6 @@
"use client" "use client"
import { useState } from 'react' import { useEffect, useState } from 'react'
import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge'
import { useTask, Task } from '@/contexts/task-context' import { useTask, Task } from '@/contexts/task-context'
export function TaskNotificationMenu() { export function TaskNotificationMenu() {
const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask()
const [isExpanded, setIsExpanded] = useState(false) const [isExpanded, setIsExpanded] = useState(false)
// Sync local state with context state
useEffect(() => {
if (isRecentTasksExpanded) {
setIsExpanded(true)
}
}, [isRecentTasksExpanded])
// Don't render if menu is closed // Don't render if menu is closed
if (!isMenuOpen) return null if (!isMenuOpen) return null

View file

@ -49,7 +49,9 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
className || "" className || ""
}`} }`}
> >
{status === "processing" && <AnimatedProcessingIcon className="mr-1.5" />} {status === "processing" && (
<AnimatedProcessingIcon className="text-current shrink-0" />
)}
{config.label} {config.label}
</div> </div>
); );

View file

@ -7,33 +7,18 @@ import {
useCallback, useCallback,
useContext, useContext,
useEffect, useEffect,
useRef,
useState, useState,
} from "react"; } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation";
import {
type Task,
useGetTasksQuery,
} from "@/app/api/queries/useGetTasksQuery";
import { useAuth } from "@/contexts/auth-context"; import { useAuth } from "@/contexts/auth-context";
export interface Task { // Task interface is now imported from useGetTasksQuery
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile { export interface TaskFile {
filename: string; filename: string;
@ -51,27 +36,54 @@ interface TaskContextType {
files: TaskFile[]; files: TaskFile[];
addTask: (taskId: string) => void; addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void; addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>; refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>; cancelTask: (taskId: string) => Promise<void>;
isPolling: boolean; isPolling: boolean;
isFetching: boolean; isFetching: boolean;
isMenuOpen: boolean; isMenuOpen: boolean;
toggleMenu: () => void; toggleMenu: () => void;
isRecentTasksExpanded: boolean;
setRecentTasksExpanded: (expanded: boolean) => void;
// React Query states
isLoading: boolean;
error: Error | null;
} }
const TaskContext = createContext<TaskContextType | undefined>(undefined); const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) { export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]); const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false);
const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false);
const previousTasksRef = useRef<Task[]>([]);
const { isAuthenticated, isNoAuthMode } = useAuth(); const { isAuthenticated, isNoAuthMode } = useAuth();
const queryClient = useQueryClient(); const queryClient = useQueryClient();
// Use React Query hooks
const {
data: tasks = [],
isLoading,
error,
refetch: refetchTasks,
isFetching,
} = useGetTasksQuery({
enabled: isAuthenticated || isNoAuthMode,
});
const cancelTaskMutation = useCancelTaskMutation({
onSuccess: () => {
toast.success("Task cancelled", {
description: "Task has been cancelled successfully",
});
},
onError: (error) => {
toast.error("Failed to cancel task", {
description: error.message,
});
},
});
const refetchSearch = useCallback(() => { const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({ queryClient.invalidateQueries({
queryKey: ["search"], queryKey: ["search"],
@ -99,33 +111,37 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
[], [],
); );
const fetchTasks = useCallback(async () => { // Handle task status changes and file updates
if (!isAuthenticated && !isNoAuthMode) return; useEffect(() => {
if (tasks.length === 0) {
// Store current tasks as previous for next comparison
previousTasksRef.current = tasks;
return;
}
setIsFetching(true); // Check for task status changes by comparing with previous tasks
try { tasks.forEach((currentTask) => {
const response = await fetch("/api/tasks"); const previousTask = previousTasksRef.current.find(
if (response.ok) { (prev) => prev.task_id === currentTask.task_id,
const data = await response.json();
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
(t) => t.task_id === newTask.task_id,
); );
// Update or add files from task.files if available // Only show toasts if we have previous data and status has changed
if (newTask.files && typeof newTask.files === "object") { if (
const taskFileEntries = Object.entries(newTask.files); (previousTask && previousTask.status !== currentTask.status) ||
(!previousTask && previousTasksRef.current.length !== 0)
) {
// Process files from failed task and add them to files list
if (currentTask.files && typeof currentTask.files === "object") {
const taskFileEntries = Object.entries(currentTask.files);
const now = new Date().toISOString(); const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => { taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) { if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath; // Use the filename from backend if available, otherwise extract from path
const fileName =
(fileInfo as any).filename ||
filePath.split("/").pop() ||
filePath;
const fileStatus = fileInfo.status as string; const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status // Map backend file status to our TaskFile status
@ -149,7 +165,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const existingFileIndex = prevFiles.findIndex( const existingFileIndex = prevFiles.findIndex(
(f) => (f) =>
f.source_url === filePath && f.source_url === filePath &&
f.task_id === newTask.task_id, f.task_id === currentTask.task_id,
); );
// Detect connector type based on file path or other indicators // Detect connector type based on file path or other indicators
@ -166,7 +182,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
size: 0, // We don't have this info from the task size: 0, // We don't have this info from the task
connector_type: connectorType, connector_type: connectorType,
status: mappedStatus, status: mappedStatus,
task_id: newTask.task_id, task_id: currentTask.task_id,
created_at: created_at:
typeof fileInfo.created_at === "string" typeof fileInfo.created_at === "string"
? fileInfo.created_at ? fileInfo.created_at
@ -190,174 +206,121 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
} }
}); });
} }
if ( if (
oldTask && previousTask &&
oldTask.status !== "completed" && previousTask.status !== "completed" &&
newTask.status === "completed" currentTask.status === "completed"
) { ) {
// Task just completed - show success toast // Task just completed - show success toast with file counts
toast.success("Task completed successfully", { const successfulFiles = currentTask.successful_files || 0;
description: `Task ${newTask.task_id} has finished processing.`, const failedFiles = currentTask.failed_files || 0;
let description = "";
if (failedFiles > 0) {
description = `${successfulFiles} file${
successfulFiles !== 1 ? "s" : ""
} uploaded successfully, ${failedFiles} file${
failedFiles !== 1 ? "s" : ""
} failed`;
} else {
description = `${successfulFiles} file${
successfulFiles !== 1 ? "s" : ""
} uploaded successfully`;
}
toast.success("Task completed", {
description,
action: { action: {
label: "View", label: "View",
onClick: () => console.log("View task", newTask.task_id), onClick: () => {
setIsMenuOpen(true);
setIsRecentTasksExpanded(true);
},
}, },
}); });
refetchSearch(); setTimeout(() => {
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) => setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id), prevFiles.filter(
(file) =>
file.task_id !== currentTask.task_id ||
file.status === "failed",
),
); );
refetchSearch();
}, 500);
} else if ( } else if (
oldTask && previousTask &&
oldTask.status !== "failed" && previousTask.status !== "failed" &&
oldTask.status !== "error" && previousTask.status !== "error" &&
(newTask.status === "failed" || newTask.status === "error") (currentTask.status === "failed" || currentTask.status === "error")
) { ) {
// Task just failed - show error toast // Task just failed - show error toast
toast.error("Task failed", { toast.error("Task failed", {
description: `Task ${newTask.task_id} failed: ${ description: `Task ${currentTask.task_id} failed: ${
newTask.error || "Unknown error" currentTask.error || "Unknown error"
}`, }`,
}); });
}
// Files will be updated to failed status by the file parsing logic above
} }
}); });
}
return newTasks; // Store current tasks as previous for next comparison
}); previousTasksRef.current = tasks;
} }, [tasks, refetchSearch]);
} catch (error) {
console.error("Failed to fetch tasks:", error);
} finally {
setIsFetching(false);
}
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => { const addTask = useCallback(
// Immediately start aggressive polling for the new task (_taskId: string) => {
let pollAttempts = 0; // React Query will automatically handle polling when tasks are active
const maxPollAttempts = 30; // Poll for up to 30 seconds // Just trigger a refetch to get the latest data
setTimeout(() => {
const aggressivePoll = async () => { refetchTasks();
try { }, 500);
const response = await fetch("/api/tasks"); },
if (response.ok) { [refetchTasks],
const data = await response.json();
const newTasks = data.tasks || [];
const foundTask = newTasks.find(
(task: Task) => task.task_id === taskId,
); );
if (foundTask) {
// Task found! Update the tasks state
setTasks((prevTasks) => {
// Check if task is already in the list
const exists = prevTasks.some((t) => t.task_id === taskId);
if (!exists) {
return [...prevTasks, foundTask];
}
// Update existing task
return prevTasks.map((t) =>
t.task_id === taskId ? foundTask : t,
);
});
return; // Stop polling, we found it
}
}
} catch (error) {
console.error("Aggressive polling failed:", error);
}
pollAttempts++;
if (pollAttempts < maxPollAttempts) {
// Continue polling every 1 second for new tasks
setTimeout(aggressivePoll, 1000);
}
};
// Start aggressive polling after a short delay to allow backend to process
setTimeout(aggressivePoll, 500);
}, []);
const refreshTasks = useCallback(async () => { const refreshTasks = useCallback(async () => {
await fetchTasks(); setFiles([]);
}, [fetchTasks]); await refetchTasks();
}, [refetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback( const cancelTask = useCallback(
async (taskId: string) => { async (taskId: string) => {
try { cancelTaskMutation.mutate({ taskId });
const response = await fetch(`/api/tasks/${taskId}/cancel`, {
method: "POST",
});
if (response.ok) {
// Immediately refresh tasks to show the updated status
await fetchTasks();
toast.success("Task cancelled", {
description: `Task ${taskId.substring(0, 8)}... has been cancelled`,
});
} else {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
} catch (error) {
console.error("Failed to cancel task:", error);
toast.error("Failed to cancel task", {
description: error instanceof Error ? error.message : "Unknown error",
});
}
}, },
[fetchTasks], [cancelTaskMutation],
); );
const toggleMenu = useCallback(() => { const toggleMenu = useCallback(() => {
setIsMenuOpen((prev) => !prev); setIsMenuOpen((prev) => !prev);
}, []); }, []);
// Periodic polling for task updates // Determine if we're polling based on React Query's refetch interval
useEffect(() => { const isPolling =
if (!isAuthenticated && !isNoAuthMode) return; isFetching &&
tasks.some(
setIsPolling(true); (task) =>
task.status === "pending" ||
// Initial fetch task.status === "running" ||
fetchTasks(); task.status === "processing",
);
// Set up polling interval - every 3 seconds (more responsive for active tasks)
const interval = setInterval(fetchTasks, 3000);
return () => {
clearInterval(interval);
setIsPolling(false);
};
}, [isAuthenticated, isNoAuthMode, fetchTasks]);
const value: TaskContextType = { const value: TaskContextType = {
tasks, tasks,
files, files,
addTask, addTask,
addFiles, addFiles,
removeTask,
refreshTasks, refreshTasks,
cancelTask, cancelTask,
isPolling, isPolling,
isFetching, isFetching,
isMenuOpen, isMenuOpen,
toggleMenu, toggleMenu,
isRecentTasksExpanded,
setRecentTasksExpanded: setIsRecentTasksExpanded,
isLoading,
error,
}; };
return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>; return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>;

View file

@ -6,6 +6,54 @@ from config.settings import INDEX_NAME
logger = get_logger(__name__) logger = get_logger(__name__)
async def check_filename_exists(request: Request, document_service, session_manager):
"""Check if a document with a specific filename already exists"""
filename = request.query_params.get("filename")
if not filename:
return JSONResponse({"error": "filename parameter is required"}, status_code=400)
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
try:
# Get user's OpenSearch client
opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token
)
# Search for any document with this exact filename
from utils.opensearch_queries import build_filename_search_body
search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
exists = len(hits) > 0
logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}")
return JSONResponse({
"exists": exists,
"filename": filename
}, status_code=200)
except Exception as e:
logger.error("Error checking filename existence", filename=filename, error=str(e))
error_str = str(e)
if "AuthenticationException" in error_str:
return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403)
else:
return JSONResponse({"error": str(e)}, status_code=500)
async def delete_documents_by_filename(request: Request, document_service, session_manager): async def delete_documents_by_filename(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename""" """Delete all documents with a specific filename"""
data = await request.json() data = await request.json()
@ -24,15 +72,11 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
) )
# Delete by query to remove all chunks of this document # Delete by query to remove all chunks of this document
delete_query = { from utils.opensearch_queries import build_filename_delete_body
"query": {
"bool": { delete_query = build_filename_delete_body(filename)
"must": [
{"term": {"filename": filename}} logger.debug(f"Deleting documents with filename: {filename}")
]
}
}
}
result = await opensearch_client.delete_by_query( result = await opensearch_client.delete_by_query(
index=INDEX_NAME, index=INDEX_NAME,

View file

@ -193,15 +193,16 @@ async def upload_and_ingest_user_file(
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Create temporary file with the actual filename (not a temp prefix)
# Store in temp directory but use the real filename
temp_dir = tempfile.gettempdir()
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
try: try:
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, 'wb') as temp_file:
temp_file.write(content) temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path) logger.debug("Created temporary file for task processing", temp_path=temp_path)

View file

@ -17,7 +17,7 @@ async def upload_ingest_router(
document_service=None, document_service=None,
langflow_file_service=None, langflow_file_service=None,
session_manager=None, session_manager=None,
task_service=None task_service=None,
): ):
""" """
Router endpoint that automatically routes upload requests based on configuration. Router endpoint that automatically routes upload requests based on configuration.
@ -31,7 +31,7 @@ async def upload_ingest_router(
try: try:
logger.debug( logger.debug(
"Router upload_ingest endpoint called", "Router upload_ingest endpoint called",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
) )
# Route based on configuration # Route based on configuration
@ -42,7 +42,9 @@ async def upload_ingest_router(
else: else:
# Route to Langflow upload and ingest using task service # Route to Langflow upload and ingest using task service
logger.debug("Routing to Langflow upload-ingest pipeline via task service") logger.debug("Routing to Langflow upload-ingest pipeline via task service")
return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) return await langflow_upload_ingest_task(
request, langflow_file_service, session_manager, task_service
)
except Exception as e: except Exception as e:
logger.error("Error in upload_ingest_router", error=str(e)) logger.error("Error in upload_ingest_router", error=str(e))
@ -57,10 +59,7 @@ async def upload_ingest_router(
async def langflow_upload_ingest_task( async def langflow_upload_ingest_task(
request: Request, request: Request, langflow_file_service, session_manager, task_service
langflow_file_service,
session_manager,
task_service
): ):
"""Task-based langflow upload and ingest for single/multiple files""" """Task-based langflow upload and ingest for single/multiple files"""
try: try:
@ -77,6 +76,7 @@ async def langflow_upload_ingest_task(
settings_json = form.get("settings") settings_json = form.get("settings")
tweaks_json = form.get("tweaks") tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
replace_duplicates = form.get("replace_duplicates", "false").lower() == "true"
# Parse JSON fields if provided # Parse JSON fields if provided
settings = None settings = None
@ -85,6 +85,7 @@ async def langflow_upload_ingest_task(
if settings_json: if settings_json:
try: try:
import json import json
settings = json.loads(settings_json) settings = json.loads(settings_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e)) logger.error("Invalid settings JSON", error=str(e))
@ -93,6 +94,7 @@ async def langflow_upload_ingest_task(
if tweaks_json: if tweaks_json:
try: try:
import json import json
tweaks = json.loads(tweaks_json) tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e)) logger.error("Invalid tweaks JSON", error=str(e))
@ -106,26 +108,35 @@ async def langflow_upload_ingest_task(
jwt_token = getattr(request.state, "jwt_token", None) jwt_token = getattr(request.state, "jwt_token", None)
if not user_id: if not user_id:
return JSONResponse({"error": "User authentication required"}, status_code=401) return JSONResponse(
{"error": "User authentication required"}, status_code=401
)
# Create temporary files for task processing # Create temporary files for task processing
import tempfile import tempfile
import os import os
temp_file_paths = [] temp_file_paths = []
original_filenames = []
try: try:
# Create temp directory reference once
temp_dir = tempfile.gettempdir()
for upload_file in upload_files: for upload_file in upload_files:
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Store ORIGINAL filename (not transformed)
original_filenames.append(upload_file.filename)
# Create temporary file with TRANSFORMED filename for filesystem safety
# Transform: spaces and / to underscore
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, "wb") as temp_file:
temp_file.write(content) temp_file.write(content)
temp_file_paths.append(temp_path) temp_file_paths.append(temp_path)
@ -136,21 +147,22 @@ async def langflow_upload_ingest_task(
user_id=user_id, user_id=user_id,
has_settings=bool(settings), has_settings=bool(settings),
has_tweaks=bool(tweaks), has_tweaks=bool(tweaks),
delete_after_ingest=delete_after_ingest delete_after_ingest=delete_after_ingest,
) )
# Create langflow upload task # Create langflow upload task
print(f"tweaks: {tweaks}") logger.debug(
print(f"settings: {settings}") f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}",
print(f"jwt_token: {jwt_token}") )
print(f"user_name: {user_name}") # Create a map between temp_file_paths and original_filenames
print(f"user_email: {user_email}") file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames))
print(f"session_id: {session_id}") logger.debug(
print(f"delete_after_ingest: {delete_after_ingest}") f"File path to original filename map: {file_path_to_original_filename}",
print(f"temp_file_paths: {temp_file_paths}") )
task_id = await task_service.create_langflow_upload_task( task_id = await task_service.create_langflow_upload_task(
user_id=user_id, user_id=user_id,
file_paths=temp_file_paths, file_paths=temp_file_paths,
original_filenames=file_path_to_original_filename,
langflow_file_service=langflow_file_service, langflow_file_service=langflow_file_service,
session_manager=session_manager, session_manager=session_manager,
jwt_token=jwt_token, jwt_token=jwt_token,
@ -160,19 +172,24 @@ async def langflow_upload_ingest_task(
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
logger.debug("Langflow upload task created successfully", task_id=task_id) logger.debug("Langflow upload task created successfully", task_id=task_id)
return JSONResponse({ return JSONResponse(
{
"task_id": task_id, "task_id": task_id,
"message": f"Langflow upload task created for {len(upload_files)} file(s)", "message": f"Langflow upload task created for {len(upload_files)} file(s)",
"file_count": len(upload_files) "file_count": len(upload_files),
}, status_code=202) # 202 Accepted for async processing },
status_code=202,
) # 202 Accepted for async processing
except Exception: except Exception:
# Clean up temp files on error # Clean up temp files on error
from utils.file_utils import safe_unlink from utils.file_utils import safe_unlink
for temp_path in temp_file_paths: for temp_path in temp_file_paths:
safe_unlink(temp_path) safe_unlink(temp_path)
raise raise
@ -184,5 +201,6 @@ async def langflow_upload_ingest_task(
error=str(e), error=str(e),
) )
import traceback import traceback
logger.error("Full traceback", traceback=traceback.format_exc()) logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500) return JSONResponse({"error": str(e)}, status_code=500)

View file

@ -953,6 +953,17 @@ async def create_app():
methods=["POST", "GET"], methods=["POST", "GET"],
), ),
# Document endpoints # Document endpoints
Route(
"/documents/check-filename",
require_auth(services["session_manager"])(
partial(
documents.check_filename_exists,
document_service=services["document_service"],
session_manager=services["session_manager"],
)
),
methods=["GET"],
),
Route( Route(
"/documents/delete-by-filename", "/documents/delete-by-filename",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(

View file

@ -55,6 +55,96 @@ class TaskProcessor:
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff retry_delay *= 2 # Exponential backoff
async def check_filename_exists(
self,
filename: str,
opensearch_client,
) -> bool:
"""
Check if a document with the given filename already exists in OpenSearch.
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
return len(hits) > 0
except (asyncio.TimeoutError, Exception) as e:
if attempt == max_retries - 1:
logger.error(
"OpenSearch filename check failed after retries",
filename=filename,
error=str(e),
attempt=attempt + 1
)
# On final failure, assume document doesn't exist (safer to reprocess than skip)
logger.warning(
"Assuming filename doesn't exist due to connection issues",
filename=filename
)
return False
else:
logger.warning(
"OpenSearch filename check failed, retrying",
filename=filename,
error=str(e),
attempt=attempt + 1,
retry_in=retry_delay
)
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
async def delete_document_by_filename(
self,
filename: str,
opensearch_client,
) -> None:
"""
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,
body=delete_body
)
deleted_count = response.get("deleted", 0)
logger.info(
"Deleted existing document chunks",
filename=filename,
deleted_count=deleted_count
)
except Exception as e:
logger.error(
"Failed to delete existing document",
filename=filename,
error=str(e)
)
raise
async def process_document_standard( async def process_document_standard(
self, self,
file_path: str, file_path: str,
@ -527,6 +617,7 @@ class LangflowFileProcessor(TaskProcessor):
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
): ):
super().__init__() super().__init__()
self.langflow_file_service = langflow_file_service self.langflow_file_service = langflow_file_service
@ -539,6 +630,7 @@ class LangflowFileProcessor(TaskProcessor):
self.tweaks = tweaks or {} self.tweaks = tweaks or {}
self.settings = settings self.settings = settings
self.delete_after_ingest = delete_after_ingest self.delete_after_ingest = delete_after_ingest
self.replace_duplicates = replace_duplicates
async def process_item( async def process_item(
self, upload_task: UploadTask, item: str, file_task: FileTask self, upload_task: UploadTask, item: str, file_task: FileTask
@ -554,37 +646,40 @@ class LangflowFileProcessor(TaskProcessor):
file_task.updated_at = time.time() file_task.updated_at = time.time()
try: try:
# Compute hash and check if already exists # Use the ORIGINAL filename stored in file_task (not the transformed temp path)
from utils.hash_utils import hash_id # This ensures we check/store the original filename with spaces, etc.
file_hash = hash_id(item) original_filename = file_task.filename or os.path.basename(item)
# Check if document already exists # Check if document with same filename already exists
opensearch_client = self.session_manager.get_user_opensearch_client( opensearch_client = self.session_manager.get_user_opensearch_client(
self.owner_user_id, self.jwt_token self.owner_user_id, self.jwt_token
) )
if await self.check_document_exists(file_hash, opensearch_client):
file_task.status = TaskStatus.COMPLETED filename_exists = await self.check_filename_exists(original_filename, opensearch_client)
file_task.result = {"status": "unchanged", "id": file_hash}
if filename_exists and not self.replace_duplicates:
# Duplicate exists and user hasn't confirmed replacement
file_task.status = TaskStatus.FAILED
file_task.error = f"File with name '{original_filename}' already exists"
file_task.updated_at = time.time() file_task.updated_at = time.time()
upload_task.successful_files += 1 upload_task.failed_files += 1
return return
elif filename_exists and self.replace_duplicates:
# Delete existing document before uploading new one
logger.info(f"Replacing existing document: {original_filename}")
await self.delete_document_by_filename(original_filename, opensearch_client)
# Read file content for processing # Read file content for processing
with open(item, 'rb') as f: with open(item, 'rb') as f:
content = f.read() content = f.read()
# Create file tuple for upload # Create file tuple for upload using ORIGINAL filename
temp_filename = os.path.basename(item) # This ensures the document is indexed with the original name
# Extract original filename from temp file suffix (remove tmp prefix) content_type, _ = mimetypes.guess_type(original_filename)
if "_" in temp_filename:
filename = temp_filename.split("_", 1)[1] # Get everything after first _
else:
filename = temp_filename
content_type, _ = mimetypes.guess_type(filename)
if not content_type: if not content_type:
content_type = 'application/octet-stream' content_type = 'application/octet-stream'
file_tuple = (filename, content, content_type) file_tuple = (original_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor # Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed # This will handle anonymous JWT creation if needed

View file

@ -20,6 +20,7 @@ class FileTask:
retry_count: int = 0 retry_count: int = 0
created_at: float = field(default_factory=time.time) created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time)
filename: Optional[str] = None # Original filename for display
@property @property
def duration_seconds(self) -> float: def duration_seconds(self) -> float:

View file

@ -1,6 +1,5 @@
import asyncio import asyncio
import random import random
from typing import Dict, Optional
import time import time
import uuid import uuid
@ -59,6 +58,7 @@ class TaskService:
file_paths: list, file_paths: list,
langflow_file_service, langflow_file_service,
session_manager, session_manager,
original_filenames: dict | None = None,
jwt_token: str = None, jwt_token: str = None,
owner_name: str = None, owner_name: str = None,
owner_email: str = None, owner_email: str = None,
@ -66,6 +66,7 @@ class TaskService:
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
) -> str: ) -> str:
"""Create a new upload task for Langflow file processing with upload and ingest""" """Create a new upload task for Langflow file processing with upload and ingest"""
# Use LangflowFileProcessor with user context # Use LangflowFileProcessor with user context
@ -82,18 +83,35 @@ class TaskService:
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
return await self.create_custom_task(user_id, file_paths, processor) return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
async def create_custom_task(self, user_id: str, items: list, processor) -> str: async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
"""Create a new task with custom processor for any type of items""" """Create a new task with custom processor for any type of items"""
import os
# Store anonymous tasks under a stable key so they can be retrieved later # Store anonymous tasks under a stable key so they can be retrieved later
store_user_id = user_id or AnonymousUser().user_id store_user_id = user_id or AnonymousUser().user_id
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# Create file tasks with original filenames if provided
normalized_originals = (
{str(k): v for k, v in original_filenames.items()} if original_filenames else {}
)
file_tasks = {
str(item): FileTask(
file_path=str(item),
filename=normalized_originals.get(
str(item), os.path.basename(str(item))
),
)
for item in items
}
upload_task = UploadTask( upload_task = UploadTask(
task_id=task_id, task_id=task_id,
total_files=len(items), total_files=len(items),
file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, file_tasks=file_tasks,
) )
# Attach the custom processor to the task # Attach the custom processor to the task
@ -268,6 +286,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
# Count running and pending files # Count running and pending files
@ -322,6 +341,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
if file_task.status.value == "running": if file_task.status.value == "running":

View file

@ -0,0 +1,55 @@
"""
Utility functions for constructing OpenSearch queries consistently.
"""
from typing import Union, List
def build_filename_query(filename: str) -> dict:
"""
Build a standardized query for finding documents by filename.
Args:
filename: The exact filename to search for
Returns:
A dict containing the OpenSearch query body
"""
return {
"term": {
"filename": filename
}
}
def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict:
"""
Build a complete search body for checking if a filename exists.
Args:
filename: The exact filename to search for
size: Number of results to return (default: 1)
source: Whether to include source fields, or list of specific fields to include (default: False)
Returns:
A dict containing the complete OpenSearch search body
"""
return {
"query": build_filename_query(filename),
"size": size,
"_source": source
}
def build_filename_delete_body(filename: str) -> dict:
"""
Build a delete-by-query body for removing all documents with a filename.
Args:
filename: The exact filename to delete
Returns:
A dict containing the OpenSearch delete-by-query body
"""
return {
"query": build_filename_query(filename)
}