Merge branch 'main' into feat/knowledge-page-sweep

This commit is contained in:
Cole Goldsmith 2025-10-06 13:22:50 -05:00
commit e4ae80860a
19 changed files with 1990 additions and 1369 deletions

View file

@ -0,0 +1,66 @@
"use client";
import { RotateCcw } from "lucide-react";
import type React from "react";
import { Button } from "./ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "./ui/dialog";
interface DuplicateHandlingDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onOverwrite: () => void | Promise<void>;
isLoading?: boolean;
}
export const DuplicateHandlingDialog: React.FC<
DuplicateHandlingDialogProps
> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => {
const handleOverwrite = async () => {
await onOverwrite();
onOpenChange(false);
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[450px]">
<DialogHeader>
<DialogTitle>Overwrite document</DialogTitle>
<DialogDescription className="pt-2 text-muted-foreground">
Overwriting will replace the existing document with another version.
This can't be undone.
</DialogDescription>
</DialogHeader>
<DialogFooter className="flex-row gap-2 justify-end">
<Button
type="button"
variant="ghost"
onClick={() => onOpenChange(false)}
disabled={isLoading}
size="sm"
>
Cancel
</Button>
<Button
type="button"
variant="default"
size="sm"
onClick={handleOverwrite}
disabled={isLoading}
className="flex items-center gap-2 !bg-accent-amber-foreground hover:!bg-foreground text-primary-foreground"
>
<RotateCcw className="h-3.5 w-3.5" />
Overwrite
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps<SVGSVGElement>) {
{...props} {...props}
> >
<title>IBM watsonx.ai Logo</title> <title>IBM watsonx.ai Logo</title>
<g clip-path="url(#clip0_2620_2081)"> <g clipPath="url(#clip0_2620_2081)">
<path <path
d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z" d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z"
fill="currentColor" fill="currentColor"

View file

@ -44,7 +44,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
placeholder={placeholder} placeholder={placeholder}
className={cn( className={cn(
"primary-input", "primary-input",
icon && "pl-9", icon && "!pl-9",
type === "password" && "!pr-8", type === "password" && "!pr-8",
icon ? inputClassName : className icon ? inputClassName : className
)} )}

View file

@ -0,0 +1,47 @@
import {
type UseMutationOptions,
useMutation,
useQueryClient,
} from "@tanstack/react-query";
export interface CancelTaskRequest {
taskId: string;
}
export interface CancelTaskResponse {
status: string;
task_id: string;
}
export const useCancelTaskMutation = (
options?: Omit<
UseMutationOptions<CancelTaskResponse, Error, CancelTaskRequest>,
"mutationFn"
>
) => {
const queryClient = useQueryClient();
async function cancelTask(
variables: CancelTaskRequest,
): Promise<CancelTaskResponse> {
const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, {
method: "POST",
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
return response.json();
}
return useMutation({
mutationFn: cancelTask,
onSuccess: () => {
// Invalidate tasks query to refresh the list
queryClient.invalidateQueries({ queryKey: ["tasks"] });
},
...options,
});
};

View file

@ -0,0 +1,79 @@
import {
type UseQueryOptions,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
export interface Task {
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TasksResponse {
tasks: Task[];
}
export const useGetTasksQuery = (
options?: Omit<UseQueryOptions<Task[]>, "queryKey" | "queryFn">
) => {
const queryClient = useQueryClient();
async function getTasks(): Promise<Task[]> {
const response = await fetch("/api/tasks");
if (!response.ok) {
throw new Error("Failed to fetch tasks");
}
const data: TasksResponse = await response.json();
return data.tasks || [];
}
const queryResult = useQuery(
{
queryKey: ["tasks"],
queryFn: getTasks,
refetchInterval: (query) => {
// Only poll if there are tasks with pending or running status
const data = query.state.data;
if (!data || data.length === 0) {
return false; // Stop polling if no tasks
}
const hasActiveTasks = data.some(
(task: Task) =>
task.status === "pending" ||
task.status === "running" ||
task.status === "processing"
);
return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist
},
refetchIntervalInBackground: true,
staleTime: 0, // Always consider data stale to ensure fresh updates
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
...options,
},
queryClient,
);
return queryResult;
};

View file

@ -1,26 +1,32 @@
"use client"; "use client";
import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react"; import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { import {
type ChunkResult, type ChunkResult,
type File, type File,
useGetSearchQuery, useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery"; } from "../../api/queries/useGetSearchQuery";
// import { Label } from "@/components/ui/label"; // import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox"; // import { Checkbox } from "@/components/ui/checkbox";
import { KnowledgeSearchInput } from "@/components/knowledge-search-input"; import { KnowledgeSearchInput } from "@/components/knowledge-search-input";
const getFileTypeLabel = (mimetype: string) => { const getFileTypeLabel = (mimetype: string) => {
if (mimetype === "application/pdf") return "PDF"; if (mimetype === "application/pdf") return "PDF";
if (mimetype === "text/plain") return "Text"; if (mimetype === "text/plain") return "Text";
if (mimetype === "application/msword") return "Word Document"; if (mimetype === "application/msword") return "Word Document";
return "Unknown"; return "Unknown";
}; };
function ChunksPageContent() { function ChunksPageContent() {
@ -37,13 +43,13 @@ function ChunksPageContent() {
number | null number | null
>(null); >(null);
// Calculate average chunk length // Calculate average chunk length
const averageChunkLength = useMemo( const averageChunkLength = useMemo(
() => () =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0, chunks.length || 0,
[chunks] [chunks],
); );
// const [selectAll, setSelectAll] = useState(false); // const [selectAll, setSelectAll] = useState(false);
@ -53,70 +59,70 @@ function ChunksPageContent() {
parsedFilterData parsedFilterData
); );
const handleCopy = useCallback((text: string, index: number) => { const handleCopy = useCallback((text: string, index: number) => {
// Trim whitespace and remove new lines/tabs for cleaner copy // Trim whitespace and remove new lines/tabs for cleaner copy
navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, ""));
setActiveCopiedChunkIndex(index); setActiveCopiedChunkIndex(index);
setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds
}, []); }, []);
const fileData = (data as File[]).find( const fileData = (data as File[]).find(
(file: File) => file.filename === filename (file: File) => file.filename === filename,
); );
// Extract chunks for the specific file // Extract chunks for the specific file
useEffect(() => { useEffect(() => {
if (!filename || !(data as File[]).length) { if (!filename || !(data as File[]).length) {
setChunks([]); setChunks([]);
return; return;
} }
setChunks( setChunks(
fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [] fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [],
); );
}, [data, filename]); }, [data, filename]);
// Set selected state for all checkboxes when selectAll changes // Set selected state for all checkboxes when selectAll changes
// useEffect(() => { useEffect(() => {
// if (selectAll) { if (selectAll) {
// setSelectedChunks(new Set(chunks.map((_, index) => index))); setSelectedChunks(new Set(chunks.map((_, index) => index)));
// } else { } else {
// setSelectedChunks(new Set()); setSelectedChunks(new Set());
// } }
// }, [selectAll, setSelectedChunks, chunks]); }, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => { const handleBack = useCallback(() => {
router.push("/knowledge"); router.push("/knowledge");
}, [router]); }, [router]);
// const handleChunkCardCheckboxChange = useCallback( // const handleChunkCardCheckboxChange = useCallback(
// (index: number) => { // (index: number) => {
// setSelectedChunks((prevSelected) => { // setSelectedChunks((prevSelected) => {
// const newSelected = new Set(prevSelected); // const newSelected = new Set(prevSelected);
// if (newSelected.has(index)) { // if (newSelected.has(index)) {
// newSelected.delete(index); // newSelected.delete(index);
// } else { // } else {
// newSelected.add(index); // newSelected.add(index);
// } // }
// return newSelected; // return newSelected;
// }); // });
// }, // },
// [setSelectedChunks] // [setSelectedChunks]
// ); // );
if (!filename) { if (!filename) {
return ( return (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" /> <Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" />
<p className="text-lg text-muted-foreground">No file specified</p> <p className="text-lg text-muted-foreground">No file specified</p>
<p className="text-sm text-muted-foreground/70 mt-2"> <p className="text-sm text-muted-foreground/70 mt-2">
Please select a file from the knowledge page Please select a file from the knowledge page
</p> </p>
</div> </div>
</div> </div>
); );
} }
return ( return (
<div className="flex flex-col h-full"> <div className="flex flex-col h-full">
@ -154,8 +160,8 @@ function ChunksPageContent() {
Select all Select all
</Label> </Label>
</div> */} </div> */}
</div> </div>
</div> </div>
{/* Content Area - matches knowledge page structure */} {/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-auto pr-6"> <div className="flex-1 overflow-auto pr-6">
@ -194,73 +200,73 @@ function ChunksPageContent() {
} }
/> />
</div> */} </div> */}
<span className="text-sm font-bold"> <span className="text-sm font-bold">
Chunk {chunk.index} Chunk {chunk.index}
</span> </span>
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70"> <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.text.length} chars {chunk.text.length} chars
</span> </span>
<div className="py-1"> <div className="py-1">
<Button <Button
onClick={() => handleCopy(chunk.text, index)} onClick={() => handleCopy(chunk.text, index)}
variant="ghost" variant="ghost"
size="sm" size="sm"
> >
{activeCopiedChunkIndex === index ? ( {activeCopiedChunkIndex === index ? (
<Check className="text-muted-foreground" /> <Check className="text-muted-foreground" />
) : ( ) : (
<Copy className="text-muted-foreground" /> <Copy className="text-muted-foreground" />
)} )}
</Button> </Button>
</div> </div>
</div> </div>
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70"> <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.score.toFixed(2)} score {chunk.score.toFixed(2)} score
</span> </span>
{/* TODO: Update to use active toggle */} {/* TODO: Update to use active toggle */}
{/* <span className="px-2 py-1 text-green-500"> {/* <span className="px-2 py-1 text-green-500">
<Switch <Switch
className="ml-2 bg-green-500" className="ml-2 bg-green-500"
checked={true} checked={true}
/> />
Active Active
</span> */} </span> */}
</div> </div>
<blockquote className="text-sm text-muted-foreground leading-relaxed ml-1.5"> <blockquote className="text-sm text-muted-foreground leading-relaxed ml-1.5">
{chunk.text} {chunk.text}
</blockquote> </blockquote>
</div> </div>
))} ))}
</div> </div>
)} )}
</div> </div>
</div> </div>
{/* Right panel - Summary (TODO), Technical details, */} {/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && ( {chunks.length > 0 && (
<div className="w-[320px] py-20 px-2"> <div className="w-[320px] py-20 px-2">
<div className="mb-8"> <div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4"> <h2 className="text-xl font-semibold mt-3 mb-4">
Technical details Technical details
</h2> </h2>
<dl> <dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground"> <dt className="text-sm/6 text-muted-foreground">
Total chunks Total chunks
</dt> </dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length} {chunks.length}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt> <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars {averageChunkLength.toFixed(0)} chars
</dd> </dd>
</div> </div>
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt> <dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
@ -270,79 +276,79 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
<div className="mb-8"> <div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3"> <h2 className="text-xl font-semibold mt-2 mb-3">
Original document Original document
</h2> </h2>
<dl> <dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt> <dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename} {fileData?.filename}
</dd> </dd>
</div> */} </div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt> <dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt> <dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size {fileData?.size
? `${Math.round(fileData.size / 1024)} KB` ? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"} : "Unknown"}
</dd> </dd>
</div> </div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt> <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt> <dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */} </div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt> <dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
</div> </div>
)} )}
</div> </div>
); );
} }
function ChunksPage() { function ChunksPage() {
return ( return (
<Suspense <Suspense
fallback={ fallback={
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" /> <Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" />
<p className="text-lg text-muted-foreground">Loading...</p> <p className="text-lg text-muted-foreground">Loading...</p>
</div> </div>
</div> </div>
} }
> >
<ChunksPageContent /> <ChunksPageContent />
</Suspense> </Suspense>
); );
} }
export default function ProtectedChunksPage() { export default function ProtectedChunksPage() {
return ( return (
<ProtectedRoute> <ProtectedRoute>
<ChunksPage /> <ChunksPage />
</ProtectedRoute> </ProtectedRoute>
); );
} }

View file

@ -1,247 +1,346 @@
"use client"; "use client";
import { themeQuartz, type ColDef } from "ag-grid-community"; import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Cloud, FileIcon } from "lucide-react"; import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { import {
useCallback, type ChangeEvent,
useRef, useCallback,
useState, useEffect,
useRef,
useState,
} from "react"; } from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useLayout } from "@/contexts/layout-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery"; import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
import "@/components/AgGrid/registerAgGridModules"; import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css"; import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner"; import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { StatusBadge } from "@/components/ui/status-badge"; import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import GoogleDriveIcon from "../settings/icons/google-drive-icon";
import OneDriveIcon from "../settings/icons/one-drive-icon";
import SharePointIcon from "../settings/icons/share-point-icon";
import { KnowledgeSearchInput } from "@/components/knowledge-search-input";
// Function to get the appropriate icon for a connector type // Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) { function getSourceIcon(connectorType?: string) {
switch (connectorType) { switch (connectorType) {
case "google_drive": case "google_drive":
return ( return (
<GoogleDriveIcon className="h-4 w-4 text-foreground flex-shrink-0" /> <SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" />
); );
case "onedrive": case "onedrive":
return <OneDriveIcon className="h-4 w-4 text-foreground flex-shrink-0" />; return (
case "sharepoint": <TbBrandOnedrive className="h-4 w-4 text-foreground flex-shrink-0" />
return ( );
<SharePointIcon className="h-4 w-4 text-foreground flex-shrink-0" /> case "sharepoint":
); return <Building2 className="h-4 w-4 text-foreground flex-shrink-0" />;
case "s3": case "s3":
return <Cloud className="h-4 w-4 text-foreground flex-shrink-0" />; return <Cloud className="h-4 w-4 text-foreground flex-shrink-0" />;
default: default:
return ( return (
<FileIcon className="h-4 w-4 text-muted-foreground flex-shrink-0" /> <HardDrive className="h-4 w-4 text-muted-foreground flex-shrink-0" />
); );
} }
} }
function SearchPage() { function SearchPage() {
const router = useRouter(); const router = useRouter();
const { files: taskFiles } = useTask(); const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { const { totalTopOffset } = useLayout();
parsedFilterData, const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
queryOverride, useKnowledgeFilter();
} = useKnowledgeFilter(); const [selectedRows, setSelectedRows] = useState<File[]>([]);
const [selectedRows, setSelectedRows] = useState<File[]>([]); const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
const [showBulkDeleteDialog, setShowBulkDeleteDialog] = useState(false);
const deleteDocumentMutation = useDeleteDocument(); const deleteDocumentMutation = useDeleteDocument();
const { data = [], isFetching } = useGetSearchQuery( useEffect(() => {
queryOverride, refreshTasks();
parsedFilterData }, [refreshTasks]);
);
// Convert TaskFiles to File format and merge with backend results const { data: searchData = [], isFetching } = useGetSearchQuery(
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { parsedFilterData?.query || "*",
return { parsedFilterData,
filename: taskFile.filename, );
mimetype: taskFile.mimetype, // Convert TaskFiles to File format and merge with backend results
source_url: taskFile.source_url, const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
size: taskFile.size, return {
connector_type: taskFile.connector_type, filename: taskFile.filename,
status: taskFile.status, mimetype: taskFile.mimetype,
}; source_url: taskFile.source_url,
}); size: taskFile.size,
connector_type: taskFile.connector_type,
status: taskFile.status,
};
});
const backendFiles = data as File[]; // Create a map of task files by filename for quick lookup
const taskFileMap = new Map(
taskFilesAsFiles.map((file) => [file.filename, file]),
);
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { // Override backend files with task file status if they exist
return ( const backendFiles = (searchData as File[])
taskFile.status !== "active" && .map((file) => {
!backendFiles.some( const taskFile = taskFileMap.get(file.filename);
(backendFile) => backendFile.filename === taskFile.filename if (taskFile) {
) // Override backend file with task file data (includes status)
); return { ...file, ...taskFile };
}); }
return file;
})
.filter((file) => {
// Only filter out files that are currently processing AND in taskFiles
const taskFile = taskFileMap.get(file.filename);
return !taskFile || taskFile.status !== "processing";
});
// Combine task files first, then backend files const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
const fileResults = [...backendFiles, ...filteredTaskFiles]; return (
taskFile.status !== "active" &&
!backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename,
)
);
});
const gridRef = useRef<AgGridReact>(null); // Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles];
const [columnDefs] = useState<ColDef<File>[]>([ const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
{ gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
field: "filename", };
headerName: "Source",
checkboxSelection: true,
headerCheckboxSelection: true,
initialFlex: 2,
minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
return (
<button
type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full"
onClick={() => {
router.push(
`/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? ""
)}`
);
}}
>
{getSourceIcon(data?.connector_type)}
<span className="font-medium text-foreground truncate">
{value}
</span>
</button>
);
},
},
{
field: "size",
headerName: "Size",
valueFormatter: (params) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-",
},
{
field: "mimetype",
headerName: "Type",
},
{
field: "owner",
headerName: "Owner",
valueFormatter: (params) =>
params.data?.owner_name || params.data?.owner_email || "—",
},
{
field: "chunkCount",
headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
},
{
field: "avgScore",
headerName: "Avg score",
cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return (
<span className="text-xs text-accent-emerald-foreground bg-accent-emerald px-2 py-1 rounded">
{value?.toFixed(2) ?? "-"}
</span>
);
},
},
{
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return <StatusBadge status={status} />;
},
},
{
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
},
cellStyle: {
alignItems: "center",
display: "flex",
justifyContent: "center",
padding: 0,
},
colId: "actions",
filter: false,
minWidth: 0,
width: 40,
resizable: false,
sortable: false,
initialFlex: 0,
},
]);
const defaultColDef: ColDef<File> = { const gridRef = useRef<AgGridReact>(null);
resizable: false,
suppressMovable: true,
initialFlex: 1,
minWidth: 100,
};
const onSelectionChanged = useCallback(() => { const columnDefs = [
if (gridRef.current) { {
const selectedNodes = gridRef.current.api.getSelectedRows(); field: "filename",
setSelectedRows(selectedNodes); headerName: "Source",
} checkboxSelection: (params: CustomCellRendererProps<File>) =>
}, []); (params?.data?.status || "active") === "active",
headerCheckboxSelection: true,
initialFlex: 2,
minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
// Read status directly from data on each render
const status = data?.status || "active";
const isActive = status === "active";
console.log(data?.filename, status, "a");
return (
<div className="flex items-center overflow-hidden w-full">
<div
className={`transition-opacity duration-200 ${isActive ? "w-0" : "w-7"}`}
></div>
<button
type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left flex-1 overflow-hidden"
onClick={() => {
if (!isActive) {
return;
}
router.push(
`/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? "",
)}`,
);
}}
>
{getSourceIcon(data?.connector_type)}
<span className="font-medium text-foreground truncate">
{value}
</span>
</button>
</div>
);
},
},
{
field: "size",
headerName: "Size",
valueFormatter: (params: CustomCellRendererProps<File>) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-",
},
{
field: "mimetype",
headerName: "Type",
},
{
field: "owner",
headerName: "Owner",
valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.owner_name || params.data?.owner_email || "—",
},
{
field: "chunkCount",
headerName: "Chunks",
valueFormatter: (params: CustomCellRendererProps<File>) => params.data?.chunkCount?.toString() || "-",
},
{
field: "avgScore",
headerName: "Avg score",
initialFlex: 0.5,
cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return (
<span className="text-xs text-accent-emerald-foreground bg-accent-emerald px-2 py-1 rounded">
{value?.toFixed(2) ?? "-"}
</span>
);
},
},
{
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
console.log(data?.filename, data?.status, "b");
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return <StatusBadge status={status} />;
},
},
{
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
const status = data?.status || "active";
if (status !== "active") {
return null;
}
return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
},
cellStyle: {
alignItems: "center",
display: "flex",
justifyContent: "center",
padding: 0,
},
colId: "actions",
filter: false,
minWidth: 0,
width: 40,
resizable: false,
sortable: false,
initialFlex: 0,
},
];
const handleBulkDelete = async () => { const defaultColDef: ColDef<File> = {
if (selectedRows.length === 0) return; resizable: false,
suppressMovable: true,
initialFlex: 1,
minWidth: 100,
};
try { const onSelectionChanged = useCallback(() => {
// Delete each file individually since the API expects one filename at a time if (gridRef.current) {
const deletePromises = selectedRows.map((row) => const selectedNodes = gridRef.current.api.getSelectedRows();
deleteDocumentMutation.mutateAsync({ filename: row.filename }) setSelectedRows(selectedNodes);
); }
}, []);
await Promise.all(deletePromises); const handleBulkDelete = async () => {
if (selectedRows.length === 0) return;
toast.success( try {
`Successfully deleted ${selectedRows.length} document${ // Delete each file individually since the API expects one filename at a time
selectedRows.length > 1 ? "s" : "" const deletePromises = selectedRows.map((row) =>
}` deleteDocumentMutation.mutateAsync({ filename: row.filename }),
); );
setSelectedRows([]);
setShowBulkDeleteDialog(false);
// Clear selection in the grid await Promise.all(deletePromises);
if (gridRef.current) {
gridRef.current.api.deselectAll(); toast.success(
} `Successfully deleted ${selectedRows.length} document${
} catch (error) { selectedRows.length > 1 ? "s" : ""
toast.error( }`,
error instanceof Error );
? error.message setSelectedRows([]);
: "Failed to delete some documents" setShowBulkDeleteDialog(false);
);
} // Clear selection in the grid
}; if (gridRef.current) {
gridRef.current.api.deselectAll();
}
} catch (error) {
toast.error(
error instanceof Error
? error.message
: "Failed to delete some documents",
);
}
};
return ( return (
<> <div
<div className="flex flex-col h-full"> className={`fixed inset-0 md:left-72 flex flex-col transition-all duration-300 ${
isMenuOpen && isPanelOpen
? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel)
isMenuOpen
? "md:right-96"
: // Only menu open: 384px
isPanelOpen
? "md:right-80"
: // Only KF panel open: 320px
"md:right-6" // Neither open: 24px
}`}
style={{ top: `${totalTopOffset}px` }}
>
<div className="flex-1 flex flex-col min-h-0 px-6 py-6">
<div className="flex items-center justify-between mb-6"> <div className="flex items-center justify-between mb-6">
<h2 className="text-lg font-semibold">Knowledge</h2> <h2 className="text-lg font-semibold">Project Knowledge</h2>
<KnowledgeDropdown variant="button" />
</div> </div>
{/* Search Input Area */} {/* Search Input Area */}
<div className="flex-1 flex flex-shrink-0 flex-wrap-reverse gap-3 mb-6"> <div className="flex-shrink-0 mb-6 xl:max-w-[75%]">
<KnowledgeSearchInput /> <form className="flex gap-3">
{/* //TODO: Implement sync button */} <div className="primary-input min-h-10 !flex items-center flex-nowrap focus-within:border-foreground transition-colors !p-[0.3rem]">
{/* <Button {selectedFilter?.name && (
<div
className={`flex items-center gap-1 h-full px-1.5 py-0.5 mr-1 rounded max-w-[25%] ${
filterAccentClasses[parsedFilterData?.color || "zinc"]
}`}
>
<span className="truncate">{selectedFilter?.name}</span>
<X
aria-label="Remove filter"
className="h-4 w-4 flex-shrink-0 cursor-pointer"
onClick={() => setSelectedFilter(null)}
/>
</div>
)}
<Search
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
/>
<input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query"
id="search-query"
type="text"
placeholder="Enter your search query..."
onChange={handleTableSearch}
/>
</div>
{/* <Button
type="submit"
variant="outline"
className="rounded-lg p-0 flex-shrink-0"
>
{isFetching ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Search className="h-4 w-4" />
)}
</Button> */}
{/* //TODO: Implement sync button */}
{/* <Button
type="button" type="button"
variant="outline" variant="outline"
className="rounded-lg flex-shrink-0" className="rounded-lg flex-shrink-0"
@ -249,69 +348,69 @@ function SearchPage() {
> >
Sync Sync
</Button> */} </Button> */}
{selectedRows.length > 0 && ( {selectedRows.length > 0 && (
<Button <Button
type="button" type="button"
variant="destructive" variant="destructive"
className="rounded-lg flex-shrink-0" className="rounded-lg flex-shrink-0"
onClick={() => setShowBulkDeleteDialog(true)} onClick={() => setShowBulkDeleteDialog(true)}
> >
Delete <Trash2 className="h-4 w-4" /> Delete
</Button> </Button>
)} )}
<div className="ml-auto"> </form>
<KnowledgeDropdown /> </div>
</div> <AgGridReact
</div> className="w-full overflow-auto"
<AgGridReact columnDefs={columnDefs as ColDef<File>[]}
className="w-full overflow-auto" defaultColDef={defaultColDef}
columnDefs={columnDefs} loading={isFetching}
defaultColDef={defaultColDef} ref={gridRef}
loading={isFetching} rowData={fileResults}
ref={gridRef} rowSelection="multiple"
rowData={fileResults} rowMultiSelectWithClick={false}
rowSelection="multiple" suppressRowClickSelection={true}
rowMultiSelectWithClick={false} getRowId={(params: GetRowIdParams<File>) => params.data?.filename}
suppressRowClickSelection={true} domLayout="normal"
getRowId={(params) => params.data.filename} onSelectionChanged={onSelectionChanged}
domLayout="normal" noRowsOverlayComponent={() => (
theme={themeQuartz.withParams({ browserColorScheme: "inherit" })} <div className="text-center pb-[45px]">
onSelectionChanged={onSelectionChanged} <div className="text-lg text-primary font-semibold">
noRowsOverlayComponent={() => ( No knowledge
<div className="text-center pb-[45px]"> </div>
<div className="text-lg text-primary font-semibold"> <div className="text-sm mt-1 text-muted-foreground">
No knowledge Add files from local or your preferred cloud.
</div> </div>
<div className="text-sm mt-1 text-muted-foreground"> </div>
Add files from local or your preferred cloud. )}
</div> />
</div> </div>
)}
/>
</div>
{/* Bulk Delete Confirmation Dialog */} {/* Bulk Delete Confirmation Dialog */}
<DeleteConfirmationDialog <DeleteConfirmationDialog
open={showBulkDeleteDialog} open={showBulkDeleteDialog}
onOpenChange={setShowBulkDeleteDialog} onOpenChange={setShowBulkDeleteDialog}
title="Delete Documents" title="Delete Documents"
description={`Are you sure you want to delete ${ description={`Are you sure you want to delete ${
selectedRows.length selectedRows.length
} document${ } document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}? This will remove all chunks and data associated with these documents. This action cannot be undone.`} }? This will remove all chunks and data associated with these documents. This action cannot be undone.
confirmText="Delete All"
onConfirm={handleBulkDelete} Documents to be deleted:
isLoading={deleteDocumentMutation.isPending} ${selectedRows.map((row) => `${row.filename}`).join("\n")}`}
/> confirmText="Delete All"
</> onConfirm={handleBulkDelete}
); isLoading={deleteDocumentMutation.isPending}
/>
</div>
);
} }
export default function ProtectedSearchPage() { export default function ProtectedSearchPage() {
return ( return (
<ProtectedRoute> <ProtectedRoute>
<SearchPage /> <SearchPage />
</ProtectedRoute> </ProtectedRoute>
); );
} }

View file

@ -1,6 +1,6 @@
"use client" "use client"
import { useState } from 'react' import { useEffect, useState } from 'react'
import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge'
import { useTask, Task } from '@/contexts/task-context' import { useTask, Task } from '@/contexts/task-context'
export function TaskNotificationMenu() { export function TaskNotificationMenu() {
const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask()
const [isExpanded, setIsExpanded] = useState(false) const [isExpanded, setIsExpanded] = useState(false)
// Sync local state with context state
useEffect(() => {
if (isRecentTasksExpanded) {
setIsExpanded(true)
}
}, [isRecentTasksExpanded])
// Don't render if menu is closed // Don't render if menu is closed
if (!isMenuOpen) return null if (!isMenuOpen) return null

View file

@ -49,7 +49,9 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
className || "" className || ""
}`} }`}
> >
{status === "processing" && <AnimatedProcessingIcon className="mr-1.5" />} {status === "processing" && (
<AnimatedProcessingIcon className="text-current shrink-0" />
)}
{config.label} {config.label}
</div> </div>
); );

View file

@ -7,33 +7,18 @@ import {
useCallback, useCallback,
useContext, useContext,
useEffect, useEffect,
useRef,
useState, useState,
} from "react"; } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation";
import {
type Task,
useGetTasksQuery,
} from "@/app/api/queries/useGetTasksQuery";
import { useAuth } from "@/contexts/auth-context"; import { useAuth } from "@/contexts/auth-context";
export interface Task { // Task interface is now imported from useGetTasksQuery
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile { export interface TaskFile {
filename: string; filename: string;
@ -51,27 +36,54 @@ interface TaskContextType {
files: TaskFile[]; files: TaskFile[];
addTask: (taskId: string) => void; addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void; addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>; refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>; cancelTask: (taskId: string) => Promise<void>;
isPolling: boolean; isPolling: boolean;
isFetching: boolean; isFetching: boolean;
isMenuOpen: boolean; isMenuOpen: boolean;
toggleMenu: () => void; toggleMenu: () => void;
isRecentTasksExpanded: boolean;
setRecentTasksExpanded: (expanded: boolean) => void;
// React Query states
isLoading: boolean;
error: Error | null;
} }
const TaskContext = createContext<TaskContextType | undefined>(undefined); const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) { export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]); const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false);
const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false);
const previousTasksRef = useRef<Task[]>([]);
const { isAuthenticated, isNoAuthMode } = useAuth(); const { isAuthenticated, isNoAuthMode } = useAuth();
const queryClient = useQueryClient(); const queryClient = useQueryClient();
// Use React Query hooks
const {
data: tasks = [],
isLoading,
error,
refetch: refetchTasks,
isFetching,
} = useGetTasksQuery({
enabled: isAuthenticated || isNoAuthMode,
});
const cancelTaskMutation = useCancelTaskMutation({
onSuccess: () => {
toast.success("Task cancelled", {
description: "Task has been cancelled successfully",
});
},
onError: (error) => {
toast.error("Failed to cancel task", {
description: error.message,
});
},
});
const refetchSearch = useCallback(() => { const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({ queryClient.invalidateQueries({
queryKey: ["search"], queryKey: ["search"],
@ -99,265 +111,216 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
[], [],
); );
const fetchTasks = useCallback(async () => { // Handle task status changes and file updates
if (!isAuthenticated && !isNoAuthMode) return; useEffect(() => {
if (tasks.length === 0) {
setIsFetching(true); // Store current tasks as previous for next comparison
try { previousTasksRef.current = tasks;
const response = await fetch("/api/tasks"); return;
if (response.ok) {
const data = await response.json();
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
(t) => t.task_id === newTask.task_id,
);
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if (
oldTask &&
oldTask.status !== "completed" &&
newTask.status === "completed"
) {
// Task just completed - show success toast
toast.success("Task completed successfully", {
description: `Task ${newTask.task_id} has finished processing.`,
action: {
label: "View",
onClick: () => console.log("View task", newTask.task_id),
},
});
refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if (
oldTask &&
oldTask.status !== "failed" &&
oldTask.status !== "error" &&
(newTask.status === "failed" || newTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${newTask.task_id} failed: ${
newTask.error || "Unknown error"
}`,
});
// Files will be updated to failed status by the file parsing logic above
}
});
}
return newTasks;
});
}
} catch (error) {
console.error("Failed to fetch tasks:", error);
} finally {
setIsFetching(false);
} }
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => { // Check for task status changes by comparing with previous tasks
// Immediately start aggressive polling for the new task tasks.forEach((currentTask) => {
let pollAttempts = 0; const previousTask = previousTasksRef.current.find(
const maxPollAttempts = 30; // Poll for up to 30 seconds (prev) => prev.task_id === currentTask.task_id,
);
const aggressivePoll = async () => { // Only show toasts if we have previous data and status has changed
try { if (
const response = await fetch("/api/tasks"); (previousTask && previousTask.status !== currentTask.status) ||
if (response.ok) { (!previousTask && previousTasksRef.current.length !== 0)
const data = await response.json(); ) {
const newTasks = data.tasks || []; // Process files from failed task and add them to files list
const foundTask = newTasks.find( if (currentTask.files && typeof currentTask.files === "object") {
(task: Task) => task.task_id === taskId, const taskFileEntries = Object.entries(currentTask.files);
); const now = new Date().toISOString();
if (foundTask) { taskFileEntries.forEach(([filePath, fileInfo]) => {
// Task found! Update the tasks state if (typeof fileInfo === "object" && fileInfo) {
setTasks((prevTasks) => { // Use the filename from backend if available, otherwise extract from path
// Check if task is already in the list const fileName =
const exists = prevTasks.some((t) => t.task_id === taskId); (fileInfo as any).filename ||
if (!exists) { filePath.split("/").pop() ||
return [...prevTasks, foundTask]; filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
} }
// Update existing task
return prevTasks.map((t) => setFiles((prevFiles) => {
t.task_id === taskId ? foundTask : t, const existingFileIndex = prevFiles.findIndex(
); (f) =>
}); f.source_url === filePath &&
return; // Stop polling, we found it f.task_id === currentTask.task_id,
} );
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: currentTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
} }
} catch (error) { if (
console.error("Aggressive polling failed:", error); previousTask &&
} previousTask.status !== "completed" &&
currentTask.status === "completed"
) {
// Task just completed - show success toast with file counts
const successfulFiles = currentTask.successful_files || 0;
const failedFiles = currentTask.failed_files || 0;
pollAttempts++; let description = "";
if (pollAttempts < maxPollAttempts) { if (failedFiles > 0) {
// Continue polling every 1 second for new tasks description = `${successfulFiles} file${
setTimeout(aggressivePoll, 1000); successfulFiles !== 1 ? "s" : ""
} } uploaded successfully, ${failedFiles} file${
}; failedFiles !== 1 ? "s" : ""
} failed`;
} else {
description = `${successfulFiles} file${
successfulFiles !== 1 ? "s" : ""
} uploaded successfully`;
}
// Start aggressive polling after a short delay to allow backend to process toast.success("Task completed", {
setTimeout(aggressivePoll, 500); description,
}, []); action: {
label: "View",
onClick: () => {
setIsMenuOpen(true);
setIsRecentTasksExpanded(true);
},
},
});
setTimeout(() => {
setFiles((prevFiles) =>
prevFiles.filter(
(file) =>
file.task_id !== currentTask.task_id ||
file.status === "failed",
),
);
refetchSearch();
}, 500);
} else if (
previousTask &&
previousTask.status !== "failed" &&
previousTask.status !== "error" &&
(currentTask.status === "failed" || currentTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${currentTask.task_id} failed: ${
currentTask.error || "Unknown error"
}`,
});
}
}
});
// Store current tasks as previous for next comparison
previousTasksRef.current = tasks;
}, [tasks, refetchSearch]);
const addTask = useCallback(
(_taskId: string) => {
// React Query will automatically handle polling when tasks are active
// Just trigger a refetch to get the latest data
setTimeout(() => {
refetchTasks();
}, 500);
},
[refetchTasks],
);
const refreshTasks = useCallback(async () => { const refreshTasks = useCallback(async () => {
await fetchTasks(); setFiles([]);
}, [fetchTasks]); await refetchTasks();
}, [refetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback( const cancelTask = useCallback(
async (taskId: string) => { async (taskId: string) => {
try { cancelTaskMutation.mutate({ taskId });
const response = await fetch(`/api/tasks/${taskId}/cancel`, {
method: "POST",
});
if (response.ok) {
// Immediately refresh tasks to show the updated status
await fetchTasks();
toast.success("Task cancelled", {
description: `Task ${taskId.substring(0, 8)}... has been cancelled`,
});
} else {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
} catch (error) {
console.error("Failed to cancel task:", error);
toast.error("Failed to cancel task", {
description: error instanceof Error ? error.message : "Unknown error",
});
}
}, },
[fetchTasks], [cancelTaskMutation],
); );
const toggleMenu = useCallback(() => { const toggleMenu = useCallback(() => {
setIsMenuOpen((prev) => !prev); setIsMenuOpen((prev) => !prev);
}, []); }, []);
// Periodic polling for task updates // Determine if we're polling based on React Query's refetch interval
useEffect(() => { const isPolling =
if (!isAuthenticated && !isNoAuthMode) return; isFetching &&
tasks.some(
setIsPolling(true); (task) =>
task.status === "pending" ||
// Initial fetch task.status === "running" ||
fetchTasks(); task.status === "processing",
);
// Set up polling interval - every 3 seconds (more responsive for active tasks)
const interval = setInterval(fetchTasks, 3000);
return () => {
clearInterval(interval);
setIsPolling(false);
};
}, [isAuthenticated, isNoAuthMode, fetchTasks]);
const value: TaskContextType = { const value: TaskContextType = {
tasks, tasks,
files, files,
addTask, addTask,
addFiles, addFiles,
removeTask,
refreshTasks, refreshTasks,
cancelTask, cancelTask,
isPolling, isPolling,
isFetching, isFetching,
isMenuOpen, isMenuOpen,
toggleMenu, toggleMenu,
isRecentTasksExpanded,
setRecentTasksExpanded: setIsRecentTasksExpanded,
isLoading,
error,
}; };
return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>; return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>;

View file

@ -6,14 +6,13 @@ from config.settings import INDEX_NAME
logger = get_logger(__name__) logger = get_logger(__name__)
async def delete_documents_by_filename(request: Request, document_service, session_manager): async def check_filename_exists(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename""" """Check if a document with a specific filename already exists"""
data = await request.json() filename = request.query_params.get("filename")
filename = data.get("filename")
if not filename: if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400) return JSONResponse({"error": "filename parameter is required"}, status_code=400)
user = request.state.user user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@ -22,34 +21,79 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
opensearch_client = session_manager.get_user_opensearch_client( opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token user.user_id, jwt_token
) )
# Search for any document with this exact filename
from utils.opensearch_queries import build_filename_search_body
search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
exists = len(hits) > 0
logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}")
return JSONResponse({
"exists": exists,
"filename": filename
}, status_code=200)
except Exception as e:
logger.error("Error checking filename existence", filename=filename, error=str(e))
error_str = str(e)
if "AuthenticationException" in error_str:
return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403)
else:
return JSONResponse({"error": str(e)}, status_code=500)
async def delete_documents_by_filename(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename"""
data = await request.json()
filename = data.get("filename")
if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400)
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
try:
# Get user's OpenSearch client
opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token
)
# Delete by query to remove all chunks of this document # Delete by query to remove all chunks of this document
delete_query = { from utils.opensearch_queries import build_filename_delete_body
"query": {
"bool": { delete_query = build_filename_delete_body(filename)
"must": [
{"term": {"filename": filename}} logger.debug(f"Deleting documents with filename: {filename}")
]
}
}
}
result = await opensearch_client.delete_by_query( result = await opensearch_client.delete_by_query(
index=INDEX_NAME, index=INDEX_NAME,
body=delete_query, body=delete_query,
conflicts="proceed" conflicts="proceed"
) )
deleted_count = result.get("deleted", 0) deleted_count = result.get("deleted", 0)
logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id) logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id)
return JSONResponse({ return JSONResponse({
"success": True, "success": True,
"deleted_chunks": deleted_count, "deleted_chunks": deleted_count,
"filename": filename, "filename": filename,
"message": f"All documents with filename '{filename}' deleted successfully" "message": f"All documents with filename '{filename}' deleted successfully"
}, status_code=200) }, status_code=200)
except Exception as e: except Exception as e:
logger.error("Error deleting documents by filename", filename=filename, error=str(e)) logger.error("Error deleting documents by filename", filename=filename, error=str(e))
error_str = str(e) error_str = str(e)

View file

@ -189,19 +189,20 @@ async def upload_and_ingest_user_file(
# Create temporary file for task processing # Create temporary file for task processing
import tempfile import tempfile
import os import os
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Create temporary file with the actual filename (not a temp prefix)
# Store in temp directory but use the real filename
temp_dir = tempfile.gettempdir()
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
try: try:
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, 'wb') as temp_file:
temp_file.write(content) temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path) logger.debug("Created temporary file for task processing", temp_path=temp_path)

View file

@ -13,27 +13,27 @@ logger = get_logger(__name__)
async def upload_ingest_router( async def upload_ingest_router(
request: Request, request: Request,
document_service=None, document_service=None,
langflow_file_service=None, langflow_file_service=None,
session_manager=None, session_manager=None,
task_service=None task_service=None,
): ):
""" """
Router endpoint that automatically routes upload requests based on configuration. Router endpoint that automatically routes upload requests based on configuration.
- If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload) - If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload)
- If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service - If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service
This provides a single endpoint that users can call regardless of backend configuration. This provides a single endpoint that users can call regardless of backend configuration.
All langflow uploads are processed as background tasks for better scalability. All langflow uploads are processed as background tasks for better scalability.
""" """
try: try:
logger.debug( logger.debug(
"Router upload_ingest endpoint called", "Router upload_ingest endpoint called",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
) )
# Route based on configuration # Route based on configuration
if DISABLE_INGEST_WITH_LANGFLOW: if DISABLE_INGEST_WITH_LANGFLOW:
# Route to traditional OpenRAG upload # Route to traditional OpenRAG upload
@ -42,8 +42,10 @@ async def upload_ingest_router(
else: else:
# Route to Langflow upload and ingest using task service # Route to Langflow upload and ingest using task service
logger.debug("Routing to Langflow upload-ingest pipeline via task service") logger.debug("Routing to Langflow upload-ingest pipeline via task service")
return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) return await langflow_upload_ingest_task(
request, langflow_file_service, session_manager, task_service
)
except Exception as e: except Exception as e:
logger.error("Error in upload_ingest_router", error=str(e)) logger.error("Error in upload_ingest_router", error=str(e))
error_msg = str(e) error_msg = str(e)
@ -57,17 +59,14 @@ async def upload_ingest_router(
async def langflow_upload_ingest_task( async def langflow_upload_ingest_task(
request: Request, request: Request, langflow_file_service, session_manager, task_service
langflow_file_service,
session_manager,
task_service
): ):
"""Task-based langflow upload and ingest for single/multiple files""" """Task-based langflow upload and ingest for single/multiple files"""
try: try:
logger.debug("Task-based langflow upload_ingest endpoint called") logger.debug("Task-based langflow upload_ingest endpoint called")
form = await request.form() form = await request.form()
upload_files = form.getlist("file") upload_files = form.getlist("file")
if not upload_files or len(upload_files) == 0: if not upload_files or len(upload_files) == 0:
logger.error("No files provided in task-based upload request") logger.error("No files provided in task-based upload request")
return JSONResponse({"error": "Missing files"}, status_code=400) return JSONResponse({"error": "Missing files"}, status_code=400)
@ -77,14 +76,16 @@ async def langflow_upload_ingest_task(
settings_json = form.get("settings") settings_json = form.get("settings")
tweaks_json = form.get("tweaks") tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
replace_duplicates = form.get("replace_duplicates", "false").lower() == "true"
# Parse JSON fields if provided # Parse JSON fields if provided
settings = None settings = None
tweaks = None tweaks = None
if settings_json: if settings_json:
try: try:
import json import json
settings = json.loads(settings_json) settings = json.loads(settings_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e)) logger.error("Invalid settings JSON", error=str(e))
@ -93,6 +94,7 @@ async def langflow_upload_ingest_task(
if tweaks_json: if tweaks_json:
try: try:
import json import json
tweaks = json.loads(tweaks_json) tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e)) logger.error("Invalid tweaks JSON", error=str(e))
@ -106,28 +108,37 @@ async def langflow_upload_ingest_task(
jwt_token = getattr(request.state, "jwt_token", None) jwt_token = getattr(request.state, "jwt_token", None)
if not user_id: if not user_id:
return JSONResponse({"error": "User authentication required"}, status_code=401) return JSONResponse(
{"error": "User authentication required"}, status_code=401
)
# Create temporary files for task processing # Create temporary files for task processing
import tempfile import tempfile
import os import os
temp_file_paths = [] temp_file_paths = []
original_filenames = []
try: try:
# Create temp directory reference once
temp_dir = tempfile.gettempdir()
for upload_file in upload_files: for upload_file in upload_files:
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Store ORIGINAL filename (not transformed)
original_filenames.append(upload_file.filename)
# Create temporary file with TRANSFORMED filename for filesystem safety
# Transform: spaces and / to underscore
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, "wb") as temp_file:
temp_file.write(content) temp_file.write(content)
temp_file_paths.append(temp_path) temp_file_paths.append(temp_path)
logger.debug( logger.debug(
@ -136,21 +147,22 @@ async def langflow_upload_ingest_task(
user_id=user_id, user_id=user_id,
has_settings=bool(settings), has_settings=bool(settings),
has_tweaks=bool(tweaks), has_tweaks=bool(tweaks),
delete_after_ingest=delete_after_ingest delete_after_ingest=delete_after_ingest,
) )
# Create langflow upload task # Create langflow upload task
print(f"tweaks: {tweaks}") logger.debug(
print(f"settings: {settings}") f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}",
print(f"jwt_token: {jwt_token}") )
print(f"user_name: {user_name}") # Create a map between temp_file_paths and original_filenames
print(f"user_email: {user_email}") file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames))
print(f"session_id: {session_id}") logger.debug(
print(f"delete_after_ingest: {delete_after_ingest}") f"File path to original filename map: {file_path_to_original_filename}",
print(f"temp_file_paths: {temp_file_paths}") )
task_id = await task_service.create_langflow_upload_task( task_id = await task_service.create_langflow_upload_task(
user_id=user_id, user_id=user_id,
file_paths=temp_file_paths, file_paths=temp_file_paths,
original_filenames=file_path_to_original_filename,
langflow_file_service=langflow_file_service, langflow_file_service=langflow_file_service,
session_manager=session_manager, session_manager=session_manager,
jwt_token=jwt_token, jwt_token=jwt_token,
@ -160,23 +172,28 @@ async def langflow_upload_ingest_task(
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
logger.debug("Langflow upload task created successfully", task_id=task_id) logger.debug("Langflow upload task created successfully", task_id=task_id)
return JSONResponse({ return JSONResponse(
"task_id": task_id, {
"message": f"Langflow upload task created for {len(upload_files)} file(s)", "task_id": task_id,
"file_count": len(upload_files) "message": f"Langflow upload task created for {len(upload_files)} file(s)",
}, status_code=202) # 202 Accepted for async processing "file_count": len(upload_files),
},
status_code=202,
) # 202 Accepted for async processing
except Exception: except Exception:
# Clean up temp files on error # Clean up temp files on error
from utils.file_utils import safe_unlink from utils.file_utils import safe_unlink
for temp_path in temp_file_paths: for temp_path in temp_file_paths:
safe_unlink(temp_path) safe_unlink(temp_path)
raise raise
except Exception as e: except Exception as e:
logger.error( logger.error(
"Task-based langflow upload_ingest endpoint failed", "Task-based langflow upload_ingest endpoint failed",
@ -184,5 +201,6 @@ async def langflow_upload_ingest_task(
error=str(e), error=str(e),
) )
import traceback import traceback
logger.error("Full traceback", traceback=traceback.format_exc()) logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500) return JSONResponse({"error": str(e)}, status_code=500)

View file

@ -953,6 +953,17 @@ async def create_app():
methods=["POST", "GET"], methods=["POST", "GET"],
), ),
# Document endpoints # Document endpoints
Route(
"/documents/check-filename",
require_auth(services["session_manager"])(
partial(
documents.check_filename_exists,
document_service=services["document_service"],
session_manager=services["session_manager"],
)
),
methods=["GET"],
),
Route( Route(
"/documents/delete-by-filename", "/documents/delete-by-filename",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(

View file

@ -55,6 +55,96 @@ class TaskProcessor:
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff retry_delay *= 2 # Exponential backoff
async def check_filename_exists(
self,
filename: str,
opensearch_client,
) -> bool:
"""
Check if a document with the given filename already exists in OpenSearch.
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
return len(hits) > 0
except (asyncio.TimeoutError, Exception) as e:
if attempt == max_retries - 1:
logger.error(
"OpenSearch filename check failed after retries",
filename=filename,
error=str(e),
attempt=attempt + 1
)
# On final failure, assume document doesn't exist (safer to reprocess than skip)
logger.warning(
"Assuming filename doesn't exist due to connection issues",
filename=filename
)
return False
else:
logger.warning(
"OpenSearch filename check failed, retrying",
filename=filename,
error=str(e),
attempt=attempt + 1,
retry_in=retry_delay
)
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
async def delete_document_by_filename(
self,
filename: str,
opensearch_client,
) -> None:
"""
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,
body=delete_body
)
deleted_count = response.get("deleted", 0)
logger.info(
"Deleted existing document chunks",
filename=filename,
deleted_count=deleted_count
)
except Exception as e:
logger.error(
"Failed to delete existing document",
filename=filename,
error=str(e)
)
raise
async def process_document_standard( async def process_document_standard(
self, self,
file_path: str, file_path: str,
@ -527,6 +617,7 @@ class LangflowFileProcessor(TaskProcessor):
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
): ):
super().__init__() super().__init__()
self.langflow_file_service = langflow_file_service self.langflow_file_service = langflow_file_service
@ -539,6 +630,7 @@ class LangflowFileProcessor(TaskProcessor):
self.tweaks = tweaks or {} self.tweaks = tweaks or {}
self.settings = settings self.settings = settings
self.delete_after_ingest = delete_after_ingest self.delete_after_ingest = delete_after_ingest
self.replace_duplicates = replace_duplicates
async def process_item( async def process_item(
self, upload_task: UploadTask, item: str, file_task: FileTask self, upload_task: UploadTask, item: str, file_task: FileTask
@ -554,37 +646,40 @@ class LangflowFileProcessor(TaskProcessor):
file_task.updated_at = time.time() file_task.updated_at = time.time()
try: try:
# Compute hash and check if already exists # Use the ORIGINAL filename stored in file_task (not the transformed temp path)
from utils.hash_utils import hash_id # This ensures we check/store the original filename with spaces, etc.
file_hash = hash_id(item) original_filename = file_task.filename or os.path.basename(item)
# Check if document already exists # Check if document with same filename already exists
opensearch_client = self.session_manager.get_user_opensearch_client( opensearch_client = self.session_manager.get_user_opensearch_client(
self.owner_user_id, self.jwt_token self.owner_user_id, self.jwt_token
) )
if await self.check_document_exists(file_hash, opensearch_client):
file_task.status = TaskStatus.COMPLETED filename_exists = await self.check_filename_exists(original_filename, opensearch_client)
file_task.result = {"status": "unchanged", "id": file_hash}
if filename_exists and not self.replace_duplicates:
# Duplicate exists and user hasn't confirmed replacement
file_task.status = TaskStatus.FAILED
file_task.error = f"File with name '{original_filename}' already exists"
file_task.updated_at = time.time() file_task.updated_at = time.time()
upload_task.successful_files += 1 upload_task.failed_files += 1
return return
elif filename_exists and self.replace_duplicates:
# Delete existing document before uploading new one
logger.info(f"Replacing existing document: {original_filename}")
await self.delete_document_by_filename(original_filename, opensearch_client)
# Read file content for processing # Read file content for processing
with open(item, 'rb') as f: with open(item, 'rb') as f:
content = f.read() content = f.read()
# Create file tuple for upload # Create file tuple for upload using ORIGINAL filename
temp_filename = os.path.basename(item) # This ensures the document is indexed with the original name
# Extract original filename from temp file suffix (remove tmp prefix) content_type, _ = mimetypes.guess_type(original_filename)
if "_" in temp_filename:
filename = temp_filename.split("_", 1)[1] # Get everything after first _
else:
filename = temp_filename
content_type, _ = mimetypes.guess_type(filename)
if not content_type: if not content_type:
content_type = 'application/octet-stream' content_type = 'application/octet-stream'
file_tuple = (filename, content, content_type) file_tuple = (original_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor # Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed # This will handle anonymous JWT creation if needed

View file

@ -20,7 +20,8 @@ class FileTask:
retry_count: int = 0 retry_count: int = 0
created_at: float = field(default_factory=time.time) created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time)
filename: Optional[str] = None # Original filename for display
@property @property
def duration_seconds(self) -> float: def duration_seconds(self) -> float:
"""Duration in seconds from creation to last update""" """Duration in seconds from creation to last update"""

View file

@ -1,6 +1,5 @@
import asyncio import asyncio
import random import random
from typing import Dict, Optional
import time import time
import uuid import uuid
@ -59,6 +58,7 @@ class TaskService:
file_paths: list, file_paths: list,
langflow_file_service, langflow_file_service,
session_manager, session_manager,
original_filenames: dict | None = None,
jwt_token: str = None, jwt_token: str = None,
owner_name: str = None, owner_name: str = None,
owner_email: str = None, owner_email: str = None,
@ -66,6 +66,7 @@ class TaskService:
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
) -> str: ) -> str:
"""Create a new upload task for Langflow file processing with upload and ingest""" """Create a new upload task for Langflow file processing with upload and ingest"""
# Use LangflowFileProcessor with user context # Use LangflowFileProcessor with user context
@ -82,18 +83,35 @@ class TaskService:
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
return await self.create_custom_task(user_id, file_paths, processor) return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
async def create_custom_task(self, user_id: str, items: list, processor) -> str: async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
"""Create a new task with custom processor for any type of items""" """Create a new task with custom processor for any type of items"""
import os
# Store anonymous tasks under a stable key so they can be retrieved later # Store anonymous tasks under a stable key so they can be retrieved later
store_user_id = user_id or AnonymousUser().user_id store_user_id = user_id or AnonymousUser().user_id
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# Create file tasks with original filenames if provided
normalized_originals = (
{str(k): v for k, v in original_filenames.items()} if original_filenames else {}
)
file_tasks = {
str(item): FileTask(
file_path=str(item),
filename=normalized_originals.get(
str(item), os.path.basename(str(item))
),
)
for item in items
}
upload_task = UploadTask( upload_task = UploadTask(
task_id=task_id, task_id=task_id,
total_files=len(items), total_files=len(items),
file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, file_tasks=file_tasks,
) )
# Attach the custom processor to the task # Attach the custom processor to the task
@ -268,6 +286,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
# Count running and pending files # Count running and pending files
@ -322,6 +341,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
if file_task.status.value == "running": if file_task.status.value == "running":

View file

@ -0,0 +1,55 @@
"""
Utility functions for constructing OpenSearch queries consistently.
"""
from typing import Union, List
def build_filename_query(filename: str) -> dict:
"""
Build a standardized query for finding documents by filename.
Args:
filename: The exact filename to search for
Returns:
A dict containing the OpenSearch query body
"""
return {
"term": {
"filename": filename
}
}
def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict:
"""
Build a complete search body for checking if a filename exists.
Args:
filename: The exact filename to search for
size: Number of results to return (default: 1)
source: Whether to include source fields, or list of specific fields to include (default: False)
Returns:
A dict containing the complete OpenSearch search body
"""
return {
"query": build_filename_query(filename),
"size": size,
"_source": source
}
def build_filename_delete_body(filename: str) -> dict:
"""
Build a delete-by-query body for removing all documents with a filename.
Args:
filename: The exact filename to delete
Returns:
A dict containing the OpenSearch delete-by-query body
"""
return {
"query": build_filename_query(filename)
}