Merge branch 'main' into pre-release-fixes

This commit is contained in:
Sebastián Estévez 2025-10-06 14:47:56 -04:00 committed by GitHub
commit baa779f690
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1882 additions and 1299 deletions

View file

@ -0,0 +1,66 @@
"use client";
import { RotateCcw } from "lucide-react";
import type React from "react";
import { Button } from "./ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "./ui/dialog";
interface DuplicateHandlingDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onOverwrite: () => void | Promise<void>;
isLoading?: boolean;
}
export const DuplicateHandlingDialog: React.FC<
DuplicateHandlingDialogProps
> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => {
const handleOverwrite = async () => {
await onOverwrite();
onOpenChange(false);
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[450px]">
<DialogHeader>
<DialogTitle>Overwrite document</DialogTitle>
<DialogDescription className="pt-2 text-muted-foreground">
Overwriting will replace the existing document with another version.
This can't be undone.
</DialogDescription>
</DialogHeader>
<DialogFooter className="flex-row gap-2 justify-end">
<Button
type="button"
variant="ghost"
onClick={() => onOpenChange(false)}
disabled={isLoading}
size="sm"
>
Cancel
</Button>
<Button
type="button"
variant="default"
size="sm"
onClick={handleOverwrite}
disabled={isLoading}
className="flex items-center gap-2 !bg-accent-amber-foreground hover:!bg-foreground text-primary-foreground"
>
<RotateCcw className="h-3.5 w-3.5" />
Overwrite
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps<SVGSVGElement>) {
{...props} {...props}
> >
<title>IBM watsonx.ai Logo</title> <title>IBM watsonx.ai Logo</title>
<g clip-path="url(#clip0_2620_2081)"> <g clipPath="url(#clip0_2620_2081)">
<path <path
d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z" d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z"
fill="currentColor" fill="currentColor"

View file

@ -44,7 +44,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
placeholder={placeholder} placeholder={placeholder}
className={cn( className={cn(
"primary-input", "primary-input",
icon && "pl-9", icon && "!pl-9",
type === "password" && "!pr-8", type === "password" && "!pr-8",
icon ? inputClassName : className icon ? inputClassName : className
)} )}

View file

@ -0,0 +1,47 @@
import {
type UseMutationOptions,
useMutation,
useQueryClient,
} from "@tanstack/react-query";
export interface CancelTaskRequest {
taskId: string;
}
export interface CancelTaskResponse {
status: string;
task_id: string;
}
export const useCancelTaskMutation = (
options?: Omit<
UseMutationOptions<CancelTaskResponse, Error, CancelTaskRequest>,
"mutationFn"
>
) => {
const queryClient = useQueryClient();
async function cancelTask(
variables: CancelTaskRequest,
): Promise<CancelTaskResponse> {
const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, {
method: "POST",
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
return response.json();
}
return useMutation({
mutationFn: cancelTask,
onSuccess: () => {
// Invalidate tasks query to refresh the list
queryClient.invalidateQueries({ queryKey: ["tasks"] });
},
...options,
});
};

View file

@ -29,6 +29,7 @@ export interface ChunkResult {
owner_email?: string; owner_email?: string;
file_size?: number; file_size?: number;
connector_type?: string; connector_type?: string;
index?: number;
} }
export interface File { export interface File {
@ -55,7 +56,7 @@ export interface File {
export const useGetSearchQuery = ( export const useGetSearchQuery = (
query: string, query: string,
queryData?: ParsedQueryData | null, queryData?: ParsedQueryData | null,
options?: Omit<UseQueryOptions, "queryKey" | "queryFn">, options?: Omit<UseQueryOptions, "queryKey" | "queryFn">
) => { ) => {
const queryClient = useQueryClient(); const queryClient = useQueryClient();
@ -184,7 +185,7 @@ export const useGetSearchQuery = (
queryFn: getFiles, queryFn: getFiles,
...options, ...options,
}, },
queryClient, queryClient
); );
return queryResult; return queryResult;

View file

@ -0,0 +1,79 @@
import {
type UseQueryOptions,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
export interface Task {
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TasksResponse {
tasks: Task[];
}
export const useGetTasksQuery = (
options?: Omit<UseQueryOptions<Task[]>, "queryKey" | "queryFn">
) => {
const queryClient = useQueryClient();
async function getTasks(): Promise<Task[]> {
const response = await fetch("/api/tasks");
if (!response.ok) {
throw new Error("Failed to fetch tasks");
}
const data: TasksResponse = await response.json();
return data.tasks || [];
}
const queryResult = useQuery(
{
queryKey: ["tasks"],
queryFn: getTasks,
refetchInterval: (query) => {
// Only poll if there are tasks with pending or running status
const data = query.state.data;
if (!data || data.length === 0) {
return false; // Stop polling if no tasks
}
const hasActiveTasks = data.some(
(task: Task) =>
task.status === "pending" ||
task.status === "running" ||
task.status === "processing"
);
return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist
},
refetchIntervalInBackground: true,
staleTime: 0, // Always consider data stale to ensure fresh updates
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
...options,
},
queryClient,
);
return queryResult;
};

View file

@ -1,179 +1,204 @@
"use client"; "use client";
import { ArrowLeft, Check, Copy, Loader2, Search } from "lucide-react"; import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useLayout } from "@/contexts/layout-context"; import { useLayout } from "@/contexts/layout-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { import {
type ChunkResult, type ChunkResult,
type File, type File,
useGetSearchQuery, useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery"; } from "../../api/queries/useGetSearchQuery";
import { Label } from "@/components/ui/label";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
const getFileTypeLabel = (mimetype: string) => { const getFileTypeLabel = (mimetype: string) => {
if (mimetype === "application/pdf") return "PDF"; if (mimetype === "application/pdf") return "PDF";
if (mimetype === "text/plain") return "Text"; if (mimetype === "text/plain") return "Text";
if (mimetype === "application/msword") return "Word Document"; if (mimetype === "application/msword") return "Word Document";
return "Unknown"; return "Unknown";
}; };
function ChunksPageContent() { function ChunksPageContent() {
const router = useRouter(); const router = useRouter();
const searchParams = useSearchParams(); const searchParams = useSearchParams();
const { isMenuOpen } = useTask(); const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
const { totalTopOffset } = useLayout(); useKnowledgeFilter();
const { parsedFilterData, isPanelOpen } = useKnowledgeFilter(); const { isMenuOpen } = useTask();
const { totalTopOffset } = useLayout();
const filename = searchParams.get("filename"); const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]); const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[] ChunkResult[]
>([]); >([]);
const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set()); const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null number | null
>(null); >(null);
// Calculate average chunk length // Calculate average chunk length
const averageChunkLength = useMemo( const averageChunkLength = useMemo(
() => () =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0, chunks.length || 0,
[chunks] [chunks],
); );
const [selectAll, setSelectAll] = useState(false); const [selectAll, setSelectAll] = useState(false);
const [queryInputText, setQueryInputText] = useState( const [queryInputText, setQueryInputText] = useState(
parsedFilterData?.query ?? "" parsedFilterData?.query ?? "",
); );
// Use the same search query as the knowledge page, but we'll filter for the specific file // Use the same search query as the knowledge page, but we'll filter for the specific file
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
useEffect(() => { useEffect(() => {
if (queryInputText === "") { if (queryInputText === "") {
setChunksFilteredByQuery(chunks); setChunksFilteredByQuery(chunks);
} else { } else {
setChunksFilteredByQuery( setChunksFilteredByQuery(
chunks.filter((chunk) => chunks.filter((chunk) =>
chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) chunk.text.toLowerCase().includes(queryInputText.toLowerCase()),
) ),
); );
} }
}, [queryInputText, chunks]); }, [queryInputText, chunks]);
const handleCopy = useCallback((text: string, index: number) => { const handleCopy = useCallback((text: string, index: number) => {
// Trim whitespace and remove new lines/tabs for cleaner copy // Trim whitespace and remove new lines/tabs for cleaner copy
navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, ""));
setActiveCopiedChunkIndex(index); setActiveCopiedChunkIndex(index);
setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds
}, []); }, []);
const fileData = (data as File[]).find( const fileData = (data as File[]).find(
(file: File) => file.filename === filename (file: File) => file.filename === filename,
); );
// Extract chunks for the specific file // Extract chunks for the specific file
useEffect(() => { useEffect(() => {
if (!filename || !(data as File[]).length) { if (!filename || !(data as File[]).length) {
setChunks([]); setChunks([]);
return; return;
} }
setChunks(fileData?.chunks || []); setChunks(
}, [data, filename]); fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [],
);
}, [data, filename]);
// Set selected state for all checkboxes when selectAll changes // Set selected state for all checkboxes when selectAll changes
useEffect(() => { useEffect(() => {
if (selectAll) { if (selectAll) {
setSelectedChunks(new Set(chunks.map((_, index) => index))); setSelectedChunks(new Set(chunks.map((_, index) => index)));
} else { } else {
setSelectedChunks(new Set()); setSelectedChunks(new Set());
} }
}, [selectAll, setSelectedChunks, chunks]); }, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => { const handleBack = useCallback(() => {
router.push("/knowledge"); router.push("/knowledge");
}, [router]); }, [router]);
const handleChunkCardCheckboxChange = useCallback( // const handleChunkCardCheckboxChange = useCallback(
(index: number) => { // (index: number) => {
setSelectedChunks((prevSelected) => { // setSelectedChunks((prevSelected) => {
const newSelected = new Set(prevSelected); // const newSelected = new Set(prevSelected);
if (newSelected.has(index)) { // if (newSelected.has(index)) {
newSelected.delete(index); // newSelected.delete(index);
} else { // } else {
newSelected.add(index); // newSelected.add(index);
} // }
return newSelected; // return newSelected;
}); // });
}, // },
[setSelectedChunks] // [setSelectedChunks]
); // );
if (!filename) { if (!filename) {
return ( return (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" /> <Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" />
<p className="text-lg text-muted-foreground">No file specified</p> <p className="text-lg text-muted-foreground">No file specified</p>
<p className="text-sm text-muted-foreground/70 mt-2"> <p className="text-sm text-muted-foreground/70 mt-2">
Please select a file from the knowledge page Please select a file from the knowledge page
</p> </p>
</div> </div>
</div> </div>
); );
} }
return ( return (
<div <div
className={`fixed inset-0 md:left-72 flex flex-row transition-all duration-300 ${ className={`fixed inset-0 md:left-72 flex flex-row transition-all duration-300 ${
isMenuOpen && isPanelOpen isMenuOpen && isPanelOpen
? "md:right-[704px]" ? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel) : // Both open: 384px (menu) + 320px (KF panel)
isMenuOpen isMenuOpen
? "md:right-96" ? "md:right-96"
: // Only menu open: 384px : // Only menu open: 384px
isPanelOpen isPanelOpen
? "md:right-80" ? "md:right-80"
: // Only KF panel open: 320px : // Only KF panel open: 320px
"md:right-6" // Neither open: 24px "md:right-6" // Neither open: 24px
}`} }`}
style={{ top: `${totalTopOffset}px` }} style={{ top: `${totalTopOffset}px` }}
> >
<div className="flex-1 flex flex-col min-h-0 px-6 py-6"> <div className="flex-1 flex flex-col min-h-0 px-6 py-6">
{/* Header */} {/* Header */}
<div className="flex flex-col mb-6"> <div className="flex flex-col mb-6">
<div className="flex flex-row items-center gap-3 mb-6"> <div className="flex flex-row items-center gap-3 mb-6">
<Button variant="ghost" onClick={handleBack} size="sm"> <Button variant="ghost" onClick={handleBack} size="sm">
<ArrowLeft size={24} /> <ArrowLeft size={24} />
</Button> </Button>
<h1 className="text-lg font-semibold"> <h1 className="text-lg font-semibold">
{/* Removes file extension from filename */} {/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")} {filename.replace(/\.[^/.]+$/, "")}
</h1> </h1>
</div> </div>
<div className="flex flex-col items-start mt-2"> <div className="flex flex-col items-start mt-2">
<div className="flex-1 flex items-center gap-2 w-full max-w-[616px] mb-8"> <div className="flex-1 flex items-center gap-2 w-full max-w-[640px]">
<Input <div className="primary-input min-h-10 !flex items-center flex-nowrap focus-within:border-foreground transition-colors !p-[0.3rem]">
name="search-query" {selectedFilter?.name && (
icon={!queryInputText.length ? <Search size={18} /> : null} <div
id="search-query" className={`flex items-center gap-1 h-full px-1.5 py-0.5 mr-1 rounded max-w-[25%] ${
type="text" filterAccentClasses[parsedFilterData?.color || "zinc"]
defaultValue={parsedFilterData?.query} }`}
value={queryInputText} >
onChange={(e) => setQueryInputText(e.target.value)} <span className="truncate">{selectedFilter?.name}</span>
placeholder="Search chunks..." <X
/> aria-label="Remove filter"
</div> className="h-4 w-4 flex-shrink-0 cursor-pointer"
<div className="flex items-center pl-4 gap-2"> onClick={() => setSelectedFilter(null)}
/>
</div>
)}
<Search
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
strokeWidth={1.5}
/>
<input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query"
id="search-query"
type="text"
placeholder="Enter your search query..."
onChange={(e) => setQueryInputText(e.target.value)}
value={queryInputText}
/>
</div>
</div>
{/* <div className="flex items-center pl-4 gap-2">
<Checkbox <Checkbox
id="selectAllChunks" id="selectAllChunks"
checked={selectAll} checked={selectAll}
@ -187,106 +212,114 @@ function ChunksPageContent() {
> >
Select all Select all
</Label> </Label>
</div> </div> */}
</div> </div>
</div> </div>
{/* Content Area - matches knowledge page structure */} {/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-scroll pr-6"> <div className="flex-1 overflow-scroll pr-6">
{isFetching ? ( {isFetching ? (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" /> <Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" />
<p className="text-lg text-muted-foreground"> <p className="text-lg text-muted-foreground">
Loading chunks... Loading chunks...
</p> </p>
</div> </div>
</div> </div>
) : chunks.length === 0 ? ( ) : chunks.length === 0 ? (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" /> <p className="text-xl font-semibold mb-2">No knowledge</p>
<p className="text-lg text-muted-foreground">No chunks found</p> <p className="text-sm text-secondary-foreground">
<p className="text-sm text-muted-foreground/70 mt-2"> Clear the knowledge filter or return to the knowledge page
This file may not have been indexed yet </p>
</p> </div>
</div> </div>
</div> ) : (
) : ( <div className="space-y-4 pb-6">
<div className="space-y-4 pb-6"> {chunksFilteredByQuery.map((chunk, index) => (
{chunksFilteredByQuery.map((chunk, index) => ( <div
<div key={chunk.filename + index}
key={chunk.filename + index} className="bg-muted rounded-lg p-4 border border-border/50"
className="bg-muted rounded-lg p-4 border border-border/50" >
> <div className="flex items-center justify-between mb-2">
<div className="flex items-center justify-between mb-2"> <div className="flex items-center gap-3">
<div className="flex items-center gap-3"> {/* <div>
<div>
<Checkbox <Checkbox
checked={selectedChunks.has(index)} checked={selectedChunks.has(index)}
onCheckedChange={() => onCheckedChange={() =>
handleChunkCardCheckboxChange(index) handleChunkCardCheckboxChange(index)
} }
/> />
</div> </div> */}
<span className="text-sm font-bold"> <span className="text-sm font-bold">
Chunk {chunk.page} Chunk {chunk.index}
</span> </span>
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70"> <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.text.length} chars {chunk.text.length} chars
</span> </span>
<div className="py-1"> <div className="py-1">
<Button <Button
onClick={() => handleCopy(chunk.text, index)} onClick={() => handleCopy(chunk.text, index)}
variant="ghost" variant="ghost"
size="sm" size="sm"
> >
{activeCopiedChunkIndex === index ? ( {activeCopiedChunkIndex === index ? (
<Check className="text-muted-foreground" /> <Check className="text-muted-foreground" />
) : ( ) : (
<Copy className="text-muted-foreground" /> <Copy className="text-muted-foreground" />
)} )}
</Button> </Button>
</div> </div>
</div> </div>
{/* TODO: Update to use active toggle */} <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{/* <span className="px-2 py-1 text-green-500"> {chunk.score.toFixed(2)} score
</span>
{/* TODO: Update to use active toggle */}
{/* <span className="px-2 py-1 text-green-500">
<Switch <Switch
className="ml-2 bg-green-500" className="ml-2 bg-green-500"
checked={true} checked={true}
/> />
Active Active
</span> */} </span> */}
</div> </div>
<blockquote className="text-sm text-muted-foreground leading-relaxed border-l-2 border-input ml-1.5 pl-4"> <blockquote className="text-sm text-muted-foreground leading-relaxed ml-1.5">
{chunk.text} {chunk.text}
</blockquote> </blockquote>
</div> </div>
))} ))}
</div> </div>
)} )}
</div> </div>
</div> </div>
{/* Right panel - Summary (TODO), Technical details, */} {/* Right panel - Summary (TODO), Technical details, */}
<div className="w-[320px] py-20 px-2"> {chunks.length > 0 && (
<div className="mb-8"> <div className="w-[320px] py-20 px-2">
<h2 className="text-xl font-semibold mt-3 mb-4">Technical details</h2> <div className="mb-8">
<dl> <h2 className="text-xl font-semibold mt-3 mb-4">
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> Technical details
<dt className="text-sm/6 text-muted-foreground">Total chunks</dt> </h2>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dl>
{chunks.length} <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
</dd> <dt className="text-sm/6 text-muted-foreground">
</div> Total chunks
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> </dt>
<dt className="text-sm/6 text-muted-foreground">Avg length</dt> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> {chunks.length}
{averageChunkLength.toFixed(0)} chars </dd>
</dd> </div>
</div> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
{/* TODO: Uncomment after data is available */} <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars
</dd>
</div>
{/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt> <dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
@ -296,76 +329,79 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
<div className="mb-8"> <div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3">Original document</h2> <h2 className="text-xl font-semibold mt-2 mb-3">
<dl> Original document
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> </h2>
<dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt> <dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename} {fileData?.filename}
</dd> </dd>
</div> </div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt> <dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt> <dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size {fileData?.size
? `${Math.round(fileData.size / 1024)} KB` ? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"} : "Unknown"}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt> <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> </div> */}
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt> <dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */} </div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt> <dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> </div> */}
</dl> </dl>
</div> </div>
</div> </div>
</div> )}
); </div>
);
} }
function ChunksPage() { function ChunksPage() {
return ( return (
<Suspense <Suspense
fallback={ fallback={
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" /> <Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" />
<p className="text-lg text-muted-foreground">Loading...</p> <p className="text-lg text-muted-foreground">Loading...</p>
</div> </div>
</div> </div>
} }
> >
<ChunksPageContent /> <ChunksPageContent />
</Suspense> </Suspense>
); );
} }
export default function ProtectedChunksPage() { export default function ProtectedChunksPage() {
return ( return (
<ProtectedRoute> <ProtectedRoute>
<ChunksPage /> <ChunksPage />
</ProtectedRoute> </ProtectedRoute>
); );
} }

View file

@ -1,10 +1,24 @@
"use client"; "use client";
import type { ColDef } from "ag-grid-community"; import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react"; import {
Building2,
Cloud,
Globe,
HardDrive,
Search,
Trash2,
X,
} from "lucide-react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { type ChangeEvent, useCallback, useRef, useState } from "react"; import {
type ChangeEvent,
useCallback,
useEffect,
useRef,
useState,
} from "react";
import { SiGoogledrive } from "react-icons/si"; import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb"; import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@ -18,14 +32,16 @@ import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css"; import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner"; import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { StatusBadge } from "@/components/ui/status-badge"; import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
// Function to get the appropriate icon for a connector type // Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) { function getSourceIcon(connectorType?: string) {
switch (connectorType) { switch (connectorType) {
case "url":
return <Globe className="h-4 w-4 text-muted-foreground flex-shrink-0" />;
case "google_drive": case "google_drive":
return ( return (
<SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" /> <SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" />
@ -47,7 +63,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() { function SearchPage() {
const router = useRouter(); const router = useRouter();
const { isMenuOpen, files: taskFiles } = useTask(); const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { totalTopOffset } = useLayout(); const { totalTopOffset } = useLayout();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter(); useKnowledgeFilter();
@ -56,15 +72,14 @@ function SearchPage() {
const deleteDocumentMutation = useDeleteDocument(); const deleteDocumentMutation = useDeleteDocument();
const { data = [], isFetching } = useGetSearchQuery( useEffect(() => {
refreshTasks();
}, [refreshTasks]);
const { data: searchData = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*", parsedFilterData?.query || "*",
parsedFilterData parsedFilterData,
); );
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
// Convert TaskFiles to File format and merge with backend results // Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return { return {
@ -77,13 +92,32 @@ function SearchPage() {
}; };
}); });
const backendFiles = data as File[]; // Create a map of task files by filename for quick lookup
const taskFileMap = new Map(
taskFilesAsFiles.map((file) => [file.filename, file]),
);
// Override backend files with task file status if they exist
const backendFiles = (searchData as File[])
.map((file) => {
const taskFile = taskFileMap.get(file.filename);
if (taskFile) {
// Override backend file with task file data (includes status)
return { ...file, ...taskFile };
}
return file;
})
.filter((file) => {
// Only filter out files that are currently processing AND in taskFiles
const taskFile = taskFileMap.get(file.filename);
return !taskFile || taskFile.status !== "processing";
});
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return ( return (
taskFile.status !== "active" && taskFile.status !== "active" &&
!backendFiles.some( !backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename (backendFile) => backendFile.filename === taskFile.filename,
) )
); );
}); });
@ -91,41 +125,60 @@ function SearchPage() {
// Combine task files first, then backend files // Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles]; const fileResults = [...backendFiles, ...filteredTaskFiles];
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
const gridRef = useRef<AgGridReact>(null); const gridRef = useRef<AgGridReact>(null);
const [columnDefs] = useState<ColDef<File>[]>([ const columnDefs = [
{ {
field: "filename", field: "filename",
headerName: "Source", headerName: "Source",
checkboxSelection: true, checkboxSelection: (params: CustomCellRendererProps<File>) =>
(params?.data?.status || "active") === "active",
headerCheckboxSelection: true, headerCheckboxSelection: true,
initialFlex: 2, initialFlex: 2,
minWidth: 220, minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => { cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
// Read status directly from data on each render
const status = data?.status || "active";
const isActive = status === "active";
console.log(data?.filename, status, "a");
return ( return (
<button <div className="flex items-center overflow-hidden w-full">
type="button" <div
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full" className={`transition-opacity duration-200 ${
onClick={() => { isActive ? "w-0" : "w-7"
router.push( }`}
`/knowledge/chunks?filename=${encodeURIComponent( ></div>
data?.filename ?? "" <button
)}` type="button"
); className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left flex-1 overflow-hidden"
}} onClick={() => {
> if (!isActive) {
{getSourceIcon(data?.connector_type)} return;
<span className="font-medium text-foreground truncate"> }
{value} router.push(
</span> `/knowledge/chunks?filename=${encodeURIComponent(
</button> data?.filename ?? "",
)}`,
);
}}
>
{getSourceIcon(data?.connector_type)}
<span className="font-medium text-foreground truncate">
{value}
</span>
</button>
</div>
); );
}, },
}, },
{ {
field: "size", field: "size",
headerName: "Size", headerName: "Size",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-", params.value ? `${Math.round(params.value / 1024)} KB` : "-",
}, },
{ {
@ -135,13 +188,14 @@ function SearchPage() {
{ {
field: "owner", field: "owner",
headerName: "Owner", headerName: "Owner",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.owner_name || params.data?.owner_email || "—", params.data?.owner_name || params.data?.owner_email || "—",
}, },
{ {
field: "chunkCount", field: "chunkCount",
headerName: "Chunks", headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.chunkCount?.toString() || "-",
}, },
{ {
field: "avgScore", field: "avgScore",
@ -159,6 +213,7 @@ function SearchPage() {
field: "status", field: "status",
headerName: "Status", headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
console.log(data?.filename, data?.status, "b");
// Default to 'active' status if no status is provided // Default to 'active' status if no status is provided
const status = data?.status || "active"; const status = data?.status || "active";
return <StatusBadge status={status} />; return <StatusBadge status={status} />;
@ -166,6 +221,10 @@ function SearchPage() {
}, },
{ {
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
const status = data?.status || "active";
if (status !== "active") {
return null;
}
return <KnowledgeActionsDropdown filename={data?.filename || ""} />; return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
}, },
cellStyle: { cellStyle: {
@ -182,7 +241,7 @@ function SearchPage() {
sortable: false, sortable: false,
initialFlex: 0, initialFlex: 0,
}, },
]); ];
const defaultColDef: ColDef<File> = { const defaultColDef: ColDef<File> = {
resizable: false, resizable: false,
@ -204,7 +263,7 @@ function SearchPage() {
try { try {
// Delete each file individually since the API expects one filename at a time // Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) => const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename }) deleteDocumentMutation.mutateAsync({ filename: row.filename }),
); );
await Promise.all(deletePromises); await Promise.all(deletePromises);
@ -212,7 +271,7 @@ function SearchPage() {
toast.success( toast.success(
`Successfully deleted ${selectedRows.length} document${ `Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}` }`,
); );
setSelectedRows([]); setSelectedRows([]);
setShowBulkDeleteDialog(false); setShowBulkDeleteDialog(false);
@ -225,7 +284,7 @@ function SearchPage() {
toast.error( toast.error(
error instanceof Error error instanceof Error
? error.message ? error.message
: "Failed to delete some documents" : "Failed to delete some documents",
); );
} }
}; };
@ -270,16 +329,13 @@ function SearchPage() {
/> />
</div> </div>
)} )}
<Search <Search className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground" />
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
strokeWidth={1.5}
/>
<input <input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono" className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query" name="search-query"
id="search-query" id="search-query"
type="text" type="text"
placeholder="Search your documents..." placeholder="Enter your search query..."
onChange={handleTableSearch} onChange={handleTableSearch}
/> />
</div> </div>
@ -317,7 +373,7 @@ function SearchPage() {
</div> </div>
<AgGridReact <AgGridReact
className="w-full overflow-auto" className="w-full overflow-auto"
columnDefs={columnDefs} columnDefs={columnDefs as ColDef<File>[]}
defaultColDef={defaultColDef} defaultColDef={defaultColDef}
loading={isFetching} loading={isFetching}
ref={gridRef} ref={gridRef}
@ -325,7 +381,7 @@ function SearchPage() {
rowSelection="multiple" rowSelection="multiple"
rowMultiSelectWithClick={false} rowMultiSelectWithClick={false}
suppressRowClickSelection={true} suppressRowClickSelection={true}
getRowId={(params) => params.data.filename} getRowId={(params: GetRowIdParams<File>) => params.data?.filename}
domLayout="normal" domLayout="normal"
onSelectionChanged={onSelectionChanged} onSelectionChanged={onSelectionChanged}
noRowsOverlayComponent={() => ( noRowsOverlayComponent={() => (

View file

@ -633,30 +633,54 @@ function KnowledgeSourcesPage() {
</div> </div>
{/* Conditional Sync Settings or No-Auth Message */} {/* Conditional Sync Settings or No-Auth Message */}
{ {
isNoAuthMode ? ( isNoAuthMode ? (
<Card className="border-yellow-500/50 bg-yellow-500/5"> <Card className="border-yellow-500">
<CardHeader> <CardHeader>
<CardTitle className="text-lg text-yellow-600"> <CardTitle className="text-lg">
Cloud connectors are only available with auth mode enabled Cloud connectors require authentication
</CardTitle> </CardTitle>
<CardDescription className="text-sm"> <CardDescription className="text-sm">
Please provide the following environment variables and Add the Google OAuth variables below to your <code>.env</code>{" "}
restart: then restart the OpenRAG containers.
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<div className="bg-muted rounded-md p-4 font-mono text-sm"> <div className="bg-muted rounded-md p-4 font-mono text-sm">
<div className="text-muted-foreground mb-2"> <div className="text-muted-foreground">
# make here <div>
https://console.cloud.google.com/apis/credentials <span className="mr-3 text-placeholder-foreground">
</div> 27
<div>GOOGLE_OAUTH_CLIENT_ID=</div> </span>
<div>GOOGLE_OAUTH_CLIENT_SECRET=</div> <span># Google OAuth</span>
</div> </div>
</CardContent> <div>
</Card> <span className="mr-3 text-placeholder-foreground">
) : null 28
</span>
<span># Create credentials here:</span>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">
29
</span>
<span>
# https://console.cloud.google.com/apis/credentials
</span>
</div>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">30</span>
<span>GOOGLE_OAUTH_CLIENT_ID=</span>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">31</span>
<span>GOOGLE_OAUTH_CLIENT_SECRET=</span>
</div>
</div>
</CardContent>
</Card>
) : null
// <div className="flex items-center justify-between py-4"> // <div className="flex items-center justify-between py-4">
// <div> // <div>
// <h3 className="text-lg font-medium">Sync Settings</h3> // <h3 className="text-lg font-medium">Sync Settings</h3>

View file

@ -1,6 +1,6 @@
"use client" "use client"
import { useState } from 'react' import { useEffect, useState } from 'react'
import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge'
import { useTask, Task } from '@/contexts/task-context' import { useTask, Task } from '@/contexts/task-context'
export function TaskNotificationMenu() { export function TaskNotificationMenu() {
const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask()
const [isExpanded, setIsExpanded] = useState(false) const [isExpanded, setIsExpanded] = useState(false)
// Sync local state with context state
useEffect(() => {
if (isRecentTasksExpanded) {
setIsExpanded(true)
}
}, [isRecentTasksExpanded])
// Don't render if menu is closed // Don't render if menu is closed
if (!isMenuOpen) return null if (!isMenuOpen) return null

View file

@ -1,26 +1,16 @@
interface AnimatedProcessingIconProps { import type { SVGProps } from "react";
className?: string;
size?: number;
}
export const AnimatedProcessingIcon = ({ export const AnimatedProcessingIcon = (props: SVGProps<SVGSVGElement>) => {
className = "", return (
size = 10, <svg
}: AnimatedProcessingIconProps) => { viewBox="0 0 8 12"
const width = Math.round((size * 6) / 10); fill="none"
const height = size; xmlns="http://www.w3.org/2000/svg"
{...props}
return ( >
<svg <title>Processing</title>
width={width} <style>
height={height} {`
viewBox="0 0 6 10"
fill="none"
xmlns="http://www.w3.org/2000/svg"
className={className}
>
<style>
{`
.dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; } .dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
.dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; } .dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
.dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; } .dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
@ -30,20 +20,18 @@ export const AnimatedProcessingIcon = ({
@keyframes pulse-wave { @keyframes pulse-wave {
0%, 60%, 100% { 0%, 60%, 100% {
opacity: 0.25; opacity: 0.25;
transform: scale(1);
} }
30% { 30% {
opacity: 1; opacity: 1;
transform: scale(1.2);
} }
} }
`} `}
</style> </style>
<circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" /> <circle className="dot-1" cx="2" cy="6" r="1" fill="currentColor" />
<circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" /> <circle className="dot-2" cx="2" cy="10" r="1" fill="currentColor" />
<circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" /> <circle className="dot-3" cx="6" cy="2" r="1" fill="currentColor" />
<circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" /> <circle className="dot-4" cx="6" cy="6" r="1" fill="currentColor" />
<circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" /> <circle className="dot-5" cx="6" cy="10" r="1" fill="currentColor" />
</svg> </svg>
); );
}; };

View file

@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
}`} }`}
> >
{status === "processing" && ( {status === "processing" && (
<AnimatedProcessingIcon className="text-current mr-2" size={10} /> <AnimatedProcessingIcon className="text-current h-3 w-3 shrink-0" />
)} )}
{config.label} {config.label}
</div> </div>

View file

@ -7,33 +7,18 @@ import {
useCallback, useCallback,
useContext, useContext,
useEffect, useEffect,
useRef,
useState, useState,
} from "react"; } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation";
import {
type Task,
useGetTasksQuery,
} from "@/app/api/queries/useGetTasksQuery";
import { useAuth } from "@/contexts/auth-context"; import { useAuth } from "@/contexts/auth-context";
export interface Task { // Task interface is now imported from useGetTasksQuery
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile { export interface TaskFile {
filename: string; filename: string;
@ -51,27 +36,54 @@ interface TaskContextType {
files: TaskFile[]; files: TaskFile[];
addTask: (taskId: string) => void; addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void; addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>; refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>; cancelTask: (taskId: string) => Promise<void>;
isPolling: boolean; isPolling: boolean;
isFetching: boolean; isFetching: boolean;
isMenuOpen: boolean; isMenuOpen: boolean;
toggleMenu: () => void; toggleMenu: () => void;
isRecentTasksExpanded: boolean;
setRecentTasksExpanded: (expanded: boolean) => void;
// React Query states
isLoading: boolean;
error: Error | null;
} }
const TaskContext = createContext<TaskContextType | undefined>(undefined); const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) { export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]); const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false);
const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false);
const previousTasksRef = useRef<Task[]>([]);
const { isAuthenticated, isNoAuthMode } = useAuth(); const { isAuthenticated, isNoAuthMode } = useAuth();
const queryClient = useQueryClient(); const queryClient = useQueryClient();
// Use React Query hooks
const {
data: tasks = [],
isLoading,
error,
refetch: refetchTasks,
isFetching,
} = useGetTasksQuery({
enabled: isAuthenticated || isNoAuthMode,
});
const cancelTaskMutation = useCancelTaskMutation({
onSuccess: () => {
toast.success("Task cancelled", {
description: "Task has been cancelled successfully",
});
},
onError: (error) => {
toast.error("Failed to cancel task", {
description: error.message,
});
},
});
const refetchSearch = useCallback(() => { const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({ queryClient.invalidateQueries({
queryKey: ["search"], queryKey: ["search"],
@ -99,265 +111,216 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
[], [],
); );
const fetchTasks = useCallback(async () => { // Handle task status changes and file updates
if (!isAuthenticated && !isNoAuthMode) return; useEffect(() => {
if (tasks.length === 0) {
setIsFetching(true); // Store current tasks as previous for next comparison
try { previousTasksRef.current = tasks;
const response = await fetch("/api/tasks"); return;
if (response.ok) {
const data = await response.json();
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
(t) => t.task_id === newTask.task_id,
);
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if (
oldTask &&
oldTask.status !== "completed" &&
newTask.status === "completed"
) {
// Task just completed - show success toast
toast.success("Task completed successfully", {
description: `Task ${newTask.task_id} has finished processing.`,
action: {
label: "View",
onClick: () => console.log("View task", newTask.task_id),
},
});
refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if (
oldTask &&
oldTask.status !== "failed" &&
oldTask.status !== "error" &&
(newTask.status === "failed" || newTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${newTask.task_id} failed: ${
newTask.error || "Unknown error"
}`,
});
// Files will be updated to failed status by the file parsing logic above
}
});
}
return newTasks;
});
}
} catch (error) {
console.error("Failed to fetch tasks:", error);
} finally {
setIsFetching(false);
} }
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => { // Check for task status changes by comparing with previous tasks
// Immediately start aggressive polling for the new task tasks.forEach((currentTask) => {
let pollAttempts = 0; const previousTask = previousTasksRef.current.find(
const maxPollAttempts = 30; // Poll for up to 30 seconds (prev) => prev.task_id === currentTask.task_id,
);
const aggressivePoll = async () => { // Only show toasts if we have previous data and status has changed
try { if (
const response = await fetch("/api/tasks"); (previousTask && previousTask.status !== currentTask.status) ||
if (response.ok) { (!previousTask && previousTasksRef.current.length !== 0)
const data = await response.json(); ) {
const newTasks = data.tasks || []; // Process files from failed task and add them to files list
const foundTask = newTasks.find( if (currentTask.files && typeof currentTask.files === "object") {
(task: Task) => task.task_id === taskId, const taskFileEntries = Object.entries(currentTask.files);
); const now = new Date().toISOString();
if (foundTask) { taskFileEntries.forEach(([filePath, fileInfo]) => {
// Task found! Update the tasks state if (typeof fileInfo === "object" && fileInfo) {
setTasks((prevTasks) => { // Use the filename from backend if available, otherwise extract from path
// Check if task is already in the list const fileName =
const exists = prevTasks.some((t) => t.task_id === taskId); (fileInfo as any).filename ||
if (!exists) { filePath.split("/").pop() ||
return [...prevTasks, foundTask]; filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
} }
// Update existing task
return prevTasks.map((t) => setFiles((prevFiles) => {
t.task_id === taskId ? foundTask : t, const existingFileIndex = prevFiles.findIndex(
); (f) =>
}); f.source_url === filePath &&
return; // Stop polling, we found it f.task_id === currentTask.task_id,
} );
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: currentTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
} }
} catch (error) { if (
console.error("Aggressive polling failed:", error); previousTask &&
} previousTask.status !== "completed" &&
currentTask.status === "completed"
) {
// Task just completed - show success toast with file counts
const successfulFiles = currentTask.successful_files || 0;
const failedFiles = currentTask.failed_files || 0;
pollAttempts++; let description = "";
if (pollAttempts < maxPollAttempts) { if (failedFiles > 0) {
// Continue polling every 1 second for new tasks description = `${successfulFiles} file${
setTimeout(aggressivePoll, 1000); successfulFiles !== 1 ? "s" : ""
} } uploaded successfully, ${failedFiles} file${
}; failedFiles !== 1 ? "s" : ""
} failed`;
} else {
description = `${successfulFiles} file${
successfulFiles !== 1 ? "s" : ""
} uploaded successfully`;
}
// Start aggressive polling after a short delay to allow backend to process toast.success("Task completed", {
setTimeout(aggressivePoll, 500); description,
}, []); action: {
label: "View",
onClick: () => {
setIsMenuOpen(true);
setIsRecentTasksExpanded(true);
},
},
});
setTimeout(() => {
setFiles((prevFiles) =>
prevFiles.filter(
(file) =>
file.task_id !== currentTask.task_id ||
file.status === "failed",
),
);
refetchSearch();
}, 500);
} else if (
previousTask &&
previousTask.status !== "failed" &&
previousTask.status !== "error" &&
(currentTask.status === "failed" || currentTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${currentTask.task_id} failed: ${
currentTask.error || "Unknown error"
}`,
});
}
}
});
// Store current tasks as previous for next comparison
previousTasksRef.current = tasks;
}, [tasks, refetchSearch]);
const addTask = useCallback(
(_taskId: string) => {
// React Query will automatically handle polling when tasks are active
// Just trigger a refetch to get the latest data
setTimeout(() => {
refetchTasks();
}, 500);
},
[refetchTasks],
);
const refreshTasks = useCallback(async () => { const refreshTasks = useCallback(async () => {
await fetchTasks(); setFiles([]);
}, [fetchTasks]); await refetchTasks();
}, [refetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback( const cancelTask = useCallback(
async (taskId: string) => { async (taskId: string) => {
try { cancelTaskMutation.mutate({ taskId });
const response = await fetch(`/api/tasks/${taskId}/cancel`, {
method: "POST",
});
if (response.ok) {
// Immediately refresh tasks to show the updated status
await fetchTasks();
toast.success("Task cancelled", {
description: `Task ${taskId.substring(0, 8)}... has been cancelled`,
});
} else {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
} catch (error) {
console.error("Failed to cancel task:", error);
toast.error("Failed to cancel task", {
description: error instanceof Error ? error.message : "Unknown error",
});
}
}, },
[fetchTasks], [cancelTaskMutation],
); );
const toggleMenu = useCallback(() => { const toggleMenu = useCallback(() => {
setIsMenuOpen((prev) => !prev); setIsMenuOpen((prev) => !prev);
}, []); }, []);
// Periodic polling for task updates // Determine if we're polling based on React Query's refetch interval
useEffect(() => { const isPolling =
if (!isAuthenticated && !isNoAuthMode) return; isFetching &&
tasks.some(
setIsPolling(true); (task) =>
task.status === "pending" ||
// Initial fetch task.status === "running" ||
fetchTasks(); task.status === "processing",
);
// Set up polling interval - every 3 seconds (more responsive for active tasks)
const interval = setInterval(fetchTasks, 3000);
return () => {
clearInterval(interval);
setIsPolling(false);
};
}, [isAuthenticated, isNoAuthMode, fetchTasks]);
const value: TaskContextType = { const value: TaskContextType = {
tasks, tasks,
files, files,
addTask, addTask,
addFiles, addFiles,
removeTask,
refreshTasks, refreshTasks,
cancelTask, cancelTask,
isPolling, isPolling,
isFetching, isFetching,
isMenuOpen, isMenuOpen,
toggleMenu, toggleMenu,
isRecentTasksExpanded,
setRecentTasksExpanded: setIsRecentTasksExpanded,
isLoading,
error,
}; };
return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>; return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>;

View file

@ -6,14 +6,13 @@ from config.settings import INDEX_NAME
logger = get_logger(__name__) logger = get_logger(__name__)
async def delete_documents_by_filename(request: Request, document_service, session_manager): async def check_filename_exists(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename""" """Check if a document with a specific filename already exists"""
data = await request.json() filename = request.query_params.get("filename")
filename = data.get("filename")
if not filename: if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400) return JSONResponse({"error": "filename parameter is required"}, status_code=400)
user = request.state.user user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@ -22,34 +21,79 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
opensearch_client = session_manager.get_user_opensearch_client( opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token user.user_id, jwt_token
) )
# Search for any document with this exact filename
from utils.opensearch_queries import build_filename_search_body
search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
exists = len(hits) > 0
logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}")
return JSONResponse({
"exists": exists,
"filename": filename
}, status_code=200)
except Exception as e:
logger.error("Error checking filename existence", filename=filename, error=str(e))
error_str = str(e)
if "AuthenticationException" in error_str:
return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403)
else:
return JSONResponse({"error": str(e)}, status_code=500)
async def delete_documents_by_filename(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename"""
data = await request.json()
filename = data.get("filename")
if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400)
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
try:
# Get user's OpenSearch client
opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token
)
# Delete by query to remove all chunks of this document # Delete by query to remove all chunks of this document
delete_query = { from utils.opensearch_queries import build_filename_delete_body
"query": {
"bool": { delete_query = build_filename_delete_body(filename)
"must": [
{"term": {"filename": filename}} logger.debug(f"Deleting documents with filename: {filename}")
]
}
}
}
result = await opensearch_client.delete_by_query( result = await opensearch_client.delete_by_query(
index=INDEX_NAME, index=INDEX_NAME,
body=delete_query, body=delete_query,
conflicts="proceed" conflicts="proceed"
) )
deleted_count = result.get("deleted", 0) deleted_count = result.get("deleted", 0)
logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id) logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id)
return JSONResponse({ return JSONResponse({
"success": True, "success": True,
"deleted_chunks": deleted_count, "deleted_chunks": deleted_count,
"filename": filename, "filename": filename,
"message": f"All documents with filename '{filename}' deleted successfully" "message": f"All documents with filename '{filename}' deleted successfully"
}, status_code=200) }, status_code=200)
except Exception as e: except Exception as e:
logger.error("Error deleting documents by filename", filename=filename, error=str(e)) logger.error("Error deleting documents by filename", filename=filename, error=str(e))
error_str = str(e) error_str = str(e)

View file

@ -189,19 +189,20 @@ async def upload_and_ingest_user_file(
# Create temporary file for task processing # Create temporary file for task processing
import tempfile import tempfile
import os import os
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Create temporary file with the actual filename (not a temp prefix)
# Store in temp directory but use the real filename
temp_dir = tempfile.gettempdir()
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
try: try:
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, 'wb') as temp_file:
temp_file.write(content) temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path) logger.debug("Created temporary file for task processing", temp_path=temp_path)

View file

@ -13,27 +13,27 @@ logger = get_logger(__name__)
async def upload_ingest_router( async def upload_ingest_router(
request: Request, request: Request,
document_service=None, document_service=None,
langflow_file_service=None, langflow_file_service=None,
session_manager=None, session_manager=None,
task_service=None task_service=None,
): ):
""" """
Router endpoint that automatically routes upload requests based on configuration. Router endpoint that automatically routes upload requests based on configuration.
- If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload) - If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload)
- If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service - If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service
This provides a single endpoint that users can call regardless of backend configuration. This provides a single endpoint that users can call regardless of backend configuration.
All langflow uploads are processed as background tasks for better scalability. All langflow uploads are processed as background tasks for better scalability.
""" """
try: try:
logger.debug( logger.debug(
"Router upload_ingest endpoint called", "Router upload_ingest endpoint called",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
) )
# Route based on configuration # Route based on configuration
if DISABLE_INGEST_WITH_LANGFLOW: if DISABLE_INGEST_WITH_LANGFLOW:
# Route to traditional OpenRAG upload # Route to traditional OpenRAG upload
@ -42,8 +42,10 @@ async def upload_ingest_router(
else: else:
# Route to Langflow upload and ingest using task service # Route to Langflow upload and ingest using task service
logger.debug("Routing to Langflow upload-ingest pipeline via task service") logger.debug("Routing to Langflow upload-ingest pipeline via task service")
return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) return await langflow_upload_ingest_task(
request, langflow_file_service, session_manager, task_service
)
except Exception as e: except Exception as e:
logger.error("Error in upload_ingest_router", error=str(e)) logger.error("Error in upload_ingest_router", error=str(e))
error_msg = str(e) error_msg = str(e)
@ -57,17 +59,14 @@ async def upload_ingest_router(
async def langflow_upload_ingest_task( async def langflow_upload_ingest_task(
request: Request, request: Request, langflow_file_service, session_manager, task_service
langflow_file_service,
session_manager,
task_service
): ):
"""Task-based langflow upload and ingest for single/multiple files""" """Task-based langflow upload and ingest for single/multiple files"""
try: try:
logger.debug("Task-based langflow upload_ingest endpoint called") logger.debug("Task-based langflow upload_ingest endpoint called")
form = await request.form() form = await request.form()
upload_files = form.getlist("file") upload_files = form.getlist("file")
if not upload_files or len(upload_files) == 0: if not upload_files or len(upload_files) == 0:
logger.error("No files provided in task-based upload request") logger.error("No files provided in task-based upload request")
return JSONResponse({"error": "Missing files"}, status_code=400) return JSONResponse({"error": "Missing files"}, status_code=400)
@ -77,14 +76,16 @@ async def langflow_upload_ingest_task(
settings_json = form.get("settings") settings_json = form.get("settings")
tweaks_json = form.get("tweaks") tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
replace_duplicates = form.get("replace_duplicates", "false").lower() == "true"
# Parse JSON fields if provided # Parse JSON fields if provided
settings = None settings = None
tweaks = None tweaks = None
if settings_json: if settings_json:
try: try:
import json import json
settings = json.loads(settings_json) settings = json.loads(settings_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e)) logger.error("Invalid settings JSON", error=str(e))
@ -93,6 +94,7 @@ async def langflow_upload_ingest_task(
if tweaks_json: if tweaks_json:
try: try:
import json import json
tweaks = json.loads(tweaks_json) tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e)) logger.error("Invalid tweaks JSON", error=str(e))
@ -106,28 +108,37 @@ async def langflow_upload_ingest_task(
jwt_token = getattr(request.state, "jwt_token", None) jwt_token = getattr(request.state, "jwt_token", None)
if not user_id: if not user_id:
return JSONResponse({"error": "User authentication required"}, status_code=401) return JSONResponse(
{"error": "User authentication required"}, status_code=401
)
# Create temporary files for task processing # Create temporary files for task processing
import tempfile import tempfile
import os import os
temp_file_paths = [] temp_file_paths = []
original_filenames = []
try: try:
# Create temp directory reference once
temp_dir = tempfile.gettempdir()
for upload_file in upload_files: for upload_file in upload_files:
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Store ORIGINAL filename (not transformed)
original_filenames.append(upload_file.filename)
# Create temporary file with TRANSFORMED filename for filesystem safety
# Transform: spaces and / to underscore
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, "wb") as temp_file:
temp_file.write(content) temp_file.write(content)
temp_file_paths.append(temp_path) temp_file_paths.append(temp_path)
logger.debug( logger.debug(
@ -136,21 +147,22 @@ async def langflow_upload_ingest_task(
user_id=user_id, user_id=user_id,
has_settings=bool(settings), has_settings=bool(settings),
has_tweaks=bool(tweaks), has_tweaks=bool(tweaks),
delete_after_ingest=delete_after_ingest delete_after_ingest=delete_after_ingest,
) )
# Create langflow upload task # Create langflow upload task
print(f"tweaks: {tweaks}") logger.debug(
print(f"settings: {settings}") f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}",
print(f"jwt_token: {jwt_token}") )
print(f"user_name: {user_name}") # Create a map between temp_file_paths and original_filenames
print(f"user_email: {user_email}") file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames))
print(f"session_id: {session_id}") logger.debug(
print(f"delete_after_ingest: {delete_after_ingest}") f"File path to original filename map: {file_path_to_original_filename}",
print(f"temp_file_paths: {temp_file_paths}") )
task_id = await task_service.create_langflow_upload_task( task_id = await task_service.create_langflow_upload_task(
user_id=user_id, user_id=user_id,
file_paths=temp_file_paths, file_paths=temp_file_paths,
original_filenames=file_path_to_original_filename,
langflow_file_service=langflow_file_service, langflow_file_service=langflow_file_service,
session_manager=session_manager, session_manager=session_manager,
jwt_token=jwt_token, jwt_token=jwt_token,
@ -160,23 +172,28 @@ async def langflow_upload_ingest_task(
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
logger.debug("Langflow upload task created successfully", task_id=task_id) logger.debug("Langflow upload task created successfully", task_id=task_id)
return JSONResponse({ return JSONResponse(
"task_id": task_id, {
"message": f"Langflow upload task created for {len(upload_files)} file(s)", "task_id": task_id,
"file_count": len(upload_files) "message": f"Langflow upload task created for {len(upload_files)} file(s)",
}, status_code=202) # 202 Accepted for async processing "file_count": len(upload_files),
},
status_code=202,
) # 202 Accepted for async processing
except Exception: except Exception:
# Clean up temp files on error # Clean up temp files on error
from utils.file_utils import safe_unlink from utils.file_utils import safe_unlink
for temp_path in temp_file_paths: for temp_path in temp_file_paths:
safe_unlink(temp_path) safe_unlink(temp_path)
raise raise
except Exception as e: except Exception as e:
logger.error( logger.error(
"Task-based langflow upload_ingest endpoint failed", "Task-based langflow upload_ingest endpoint failed",
@ -184,5 +201,6 @@ async def langflow_upload_ingest_task(
error=str(e), error=str(e),
) )
import traceback import traceback
logger.error("Full traceback", traceback=traceback.format_exc()) logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500) return JSONResponse({"error": str(e)}, status_code=500)

View file

@ -953,6 +953,17 @@ async def create_app():
methods=["POST", "GET"], methods=["POST", "GET"],
), ),
# Document endpoints # Document endpoints
Route(
"/documents/check-filename",
require_auth(services["session_manager"])(
partial(
documents.check_filename_exists,
document_service=services["document_service"],
session_manager=services["session_manager"],
)
),
methods=["GET"],
),
Route( Route(
"/documents/delete-by-filename", "/documents/delete-by-filename",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(

View file

@ -55,6 +55,96 @@ class TaskProcessor:
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff retry_delay *= 2 # Exponential backoff
async def check_filename_exists(
self,
filename: str,
opensearch_client,
) -> bool:
"""
Check if a document with the given filename already exists in OpenSearch.
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
return len(hits) > 0
except (asyncio.TimeoutError, Exception) as e:
if attempt == max_retries - 1:
logger.error(
"OpenSearch filename check failed after retries",
filename=filename,
error=str(e),
attempt=attempt + 1
)
# On final failure, assume document doesn't exist (safer to reprocess than skip)
logger.warning(
"Assuming filename doesn't exist due to connection issues",
filename=filename
)
return False
else:
logger.warning(
"OpenSearch filename check failed, retrying",
filename=filename,
error=str(e),
attempt=attempt + 1,
retry_in=retry_delay
)
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
async def delete_document_by_filename(
self,
filename: str,
opensearch_client,
) -> None:
"""
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,
body=delete_body
)
deleted_count = response.get("deleted", 0)
logger.info(
"Deleted existing document chunks",
filename=filename,
deleted_count=deleted_count
)
except Exception as e:
logger.error(
"Failed to delete existing document",
filename=filename,
error=str(e)
)
raise
async def process_document_standard( async def process_document_standard(
self, self,
file_path: str, file_path: str,
@ -527,6 +617,7 @@ class LangflowFileProcessor(TaskProcessor):
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
): ):
super().__init__() super().__init__()
self.langflow_file_service = langflow_file_service self.langflow_file_service = langflow_file_service
@ -539,6 +630,7 @@ class LangflowFileProcessor(TaskProcessor):
self.tweaks = tweaks or {} self.tweaks = tweaks or {}
self.settings = settings self.settings = settings
self.delete_after_ingest = delete_after_ingest self.delete_after_ingest = delete_after_ingest
self.replace_duplicates = replace_duplicates
async def process_item( async def process_item(
self, upload_task: UploadTask, item: str, file_task: FileTask self, upload_task: UploadTask, item: str, file_task: FileTask
@ -554,37 +646,40 @@ class LangflowFileProcessor(TaskProcessor):
file_task.updated_at = time.time() file_task.updated_at = time.time()
try: try:
# Compute hash and check if already exists # Use the ORIGINAL filename stored in file_task (not the transformed temp path)
from utils.hash_utils import hash_id # This ensures we check/store the original filename with spaces, etc.
file_hash = hash_id(item) original_filename = file_task.filename or os.path.basename(item)
# Check if document already exists # Check if document with same filename already exists
opensearch_client = self.session_manager.get_user_opensearch_client( opensearch_client = self.session_manager.get_user_opensearch_client(
self.owner_user_id, self.jwt_token self.owner_user_id, self.jwt_token
) )
if await self.check_document_exists(file_hash, opensearch_client):
file_task.status = TaskStatus.COMPLETED filename_exists = await self.check_filename_exists(original_filename, opensearch_client)
file_task.result = {"status": "unchanged", "id": file_hash}
if filename_exists and not self.replace_duplicates:
# Duplicate exists and user hasn't confirmed replacement
file_task.status = TaskStatus.FAILED
file_task.error = f"File with name '{original_filename}' already exists"
file_task.updated_at = time.time() file_task.updated_at = time.time()
upload_task.successful_files += 1 upload_task.failed_files += 1
return return
elif filename_exists and self.replace_duplicates:
# Delete existing document before uploading new one
logger.info(f"Replacing existing document: {original_filename}")
await self.delete_document_by_filename(original_filename, opensearch_client)
# Read file content for processing # Read file content for processing
with open(item, 'rb') as f: with open(item, 'rb') as f:
content = f.read() content = f.read()
# Create file tuple for upload # Create file tuple for upload using ORIGINAL filename
temp_filename = os.path.basename(item) # This ensures the document is indexed with the original name
# Extract original filename from temp file suffix (remove tmp prefix) content_type, _ = mimetypes.guess_type(original_filename)
if "_" in temp_filename:
filename = temp_filename.split("_", 1)[1] # Get everything after first _
else:
filename = temp_filename
content_type, _ = mimetypes.guess_type(filename)
if not content_type: if not content_type:
content_type = 'application/octet-stream' content_type = 'application/octet-stream'
file_tuple = (filename, content, content_type) file_tuple = (original_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor # Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed # This will handle anonymous JWT creation if needed

View file

@ -20,7 +20,8 @@ class FileTask:
retry_count: int = 0 retry_count: int = 0
created_at: float = field(default_factory=time.time) created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time)
filename: Optional[str] = None # Original filename for display
@property @property
def duration_seconds(self) -> float: def duration_seconds(self) -> float:
"""Duration in seconds from creation to last update""" """Duration in seconds from creation to last update"""

View file

@ -1,6 +1,5 @@
import asyncio import asyncio
import random import random
from typing import Dict, Optional
import time import time
import uuid import uuid
@ -59,6 +58,7 @@ class TaskService:
file_paths: list, file_paths: list,
langflow_file_service, langflow_file_service,
session_manager, session_manager,
original_filenames: dict | None = None,
jwt_token: str = None, jwt_token: str = None,
owner_name: str = None, owner_name: str = None,
owner_email: str = None, owner_email: str = None,
@ -66,6 +66,7 @@ class TaskService:
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
) -> str: ) -> str:
"""Create a new upload task for Langflow file processing with upload and ingest""" """Create a new upload task for Langflow file processing with upload and ingest"""
# Use LangflowFileProcessor with user context # Use LangflowFileProcessor with user context
@ -82,18 +83,35 @@ class TaskService:
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
return await self.create_custom_task(user_id, file_paths, processor) return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
async def create_custom_task(self, user_id: str, items: list, processor) -> str: async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
"""Create a new task with custom processor for any type of items""" """Create a new task with custom processor for any type of items"""
import os
# Store anonymous tasks under a stable key so they can be retrieved later # Store anonymous tasks under a stable key so they can be retrieved later
store_user_id = user_id or AnonymousUser().user_id store_user_id = user_id or AnonymousUser().user_id
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# Create file tasks with original filenames if provided
normalized_originals = (
{str(k): v for k, v in original_filenames.items()} if original_filenames else {}
)
file_tasks = {
str(item): FileTask(
file_path=str(item),
filename=normalized_originals.get(
str(item), os.path.basename(str(item))
),
)
for item in items
}
upload_task = UploadTask( upload_task = UploadTask(
task_id=task_id, task_id=task_id,
total_files=len(items), total_files=len(items),
file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, file_tasks=file_tasks,
) )
# Attach the custom processor to the task # Attach the custom processor to the task
@ -268,6 +286,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
# Count running and pending files # Count running and pending files
@ -322,6 +341,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
if file_task.status.value == "running": if file_task.status.value == "running":

View file

@ -0,0 +1,55 @@
"""
Utility functions for constructing OpenSearch queries consistently.
"""
from typing import Union, List
def build_filename_query(filename: str) -> dict:
"""
Build a standardized query for finding documents by filename.
Args:
filename: The exact filename to search for
Returns:
A dict containing the OpenSearch query body
"""
return {
"term": {
"filename": filename
}
}
def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict:
"""
Build a complete search body for checking if a filename exists.
Args:
filename: The exact filename to search for
size: Number of results to return (default: 1)
source: Whether to include source fields, or list of specific fields to include (default: False)
Returns:
A dict containing the complete OpenSearch search body
"""
return {
"query": build_filename_query(filename),
"size": size,
"_source": source
}
def build_filename_delete_body(filename: str) -> dict:
"""
Build a delete-by-query body for removing all documents with a filename.
Args:
filename: The exact filename to delete
Returns:
A dict containing the OpenSearch delete-by-query body
"""
return {
"query": build_filename_query(filename)
}