Merge branch 'main' into feat-google-drive-folder-select

This commit is contained in:
Eric Hare 2025-10-06 12:07:54 -07:00 committed by GitHub
commit 22dc5b9b65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 2241 additions and 1478 deletions

2
.gitignore vendored
View file

@ -21,3 +21,5 @@ wheels/
.DS_Store .DS_Store
config/ config/
.docling.pid

View file

@ -43,7 +43,7 @@ services:
# build: # build:
# context: . # context: .
# dockerfile: Dockerfile.backend # dockerfile: Dockerfile.backend
# container_name: openrag-backend container_name: openrag-backend
depends_on: depends_on:
- langflow - langflow
environment: environment:

View file

@ -43,7 +43,7 @@ services:
# build: # build:
# context: . # context: .
# dockerfile: Dockerfile.backend # dockerfile: Dockerfile.backend
# container_name: openrag-backend container_name: openrag-backend
depends_on: depends_on:
- langflow - langflow
environment: environment:

View file

@ -0,0 +1,66 @@
"use client";
import { RotateCcw } from "lucide-react";
import type React from "react";
import { Button } from "./ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "./ui/dialog";
interface DuplicateHandlingDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onOverwrite: () => void | Promise<void>;
isLoading?: boolean;
}
export const DuplicateHandlingDialog: React.FC<
DuplicateHandlingDialogProps
> = ({ open, onOpenChange, onOverwrite, isLoading = false }) => {
const handleOverwrite = async () => {
await onOverwrite();
onOpenChange(false);
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[450px]">
<DialogHeader>
<DialogTitle>Overwrite document</DialogTitle>
<DialogDescription className="pt-2 text-muted-foreground">
Overwriting will replace the existing document with another version.
This can't be undone.
</DialogDescription>
</DialogHeader>
<DialogFooter className="flex-row gap-2 justify-end">
<Button
type="button"
variant="ghost"
onClick={() => onOpenChange(false)}
disabled={isLoading}
size="sm"
>
Cancel
</Button>
<Button
type="button"
variant="default"
size="sm"
onClick={handleOverwrite}
disabled={isLoading}
className="flex items-center gap-2 !bg-accent-amber-foreground hover:!bg-foreground text-primary-foreground"
>
<RotateCcw className="h-3.5 w-3.5" />
Overwrite
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ export default function IBMLogo(props: React.SVGProps<SVGSVGElement>) {
{...props} {...props}
> >
<title>IBM watsonx.ai Logo</title> <title>IBM watsonx.ai Logo</title>
<g clip-path="url(#clip0_2620_2081)"> <g clipPath="url(#clip0_2620_2081)">
<path <path
d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z" d="M13 12.0007C12.4477 12.0007 12 12.4484 12 13.0007C12 13.0389 12.0071 13.0751 12.0112 13.1122C10.8708 14.0103 9.47165 14.5007 8 14.5007C5.86915 14.5007 4 12.5146 4 10.2507C4 7.90722 5.9065 6.00072 8.25 6.00072H8.5V5.00072H8.25C5.3552 5.00072 3 7.35592 3 10.2507C3 11.1927 3.2652 12.0955 3.71855 12.879C2.3619 11.6868 1.5 9.94447 1.5 8.00072C1.5 6.94312 1.74585 5.93432 2.23095 5.00292L1.34375 4.54102C0.79175 5.60157 0.5 6.79787 0.5 8.00072C0.5 12.1362 3.8645 15.5007 8 15.5007C9.6872 15.5007 11.2909 14.9411 12.6024 13.9176C12.7244 13.9706 12.8586 14.0007 13 14.0007C13.5523 14.0007 14 13.553 14 13.0007C14 12.4484 13.5523 12.0007 13 12.0007Z"
fill="currentColor" fill="currentColor"

View file

@ -44,7 +44,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
placeholder={placeholder} placeholder={placeholder}
className={cn( className={cn(
"primary-input", "primary-input",
icon && "pl-9", icon && "!pl-9",
type === "password" && "!pr-8", type === "password" && "!pr-8",
icon ? inputClassName : className icon ? inputClassName : className
)} )}

View file

@ -0,0 +1,47 @@
import {
type UseMutationOptions,
useMutation,
useQueryClient,
} from "@tanstack/react-query";
export interface CancelTaskRequest {
taskId: string;
}
export interface CancelTaskResponse {
status: string;
task_id: string;
}
export const useCancelTaskMutation = (
options?: Omit<
UseMutationOptions<CancelTaskResponse, Error, CancelTaskRequest>,
"mutationFn"
>
) => {
const queryClient = useQueryClient();
async function cancelTask(
variables: CancelTaskRequest,
): Promise<CancelTaskResponse> {
const response = await fetch(`/api/tasks/${variables.taskId}/cancel`, {
method: "POST",
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
return response.json();
}
return useMutation({
mutationFn: cancelTask,
onSuccess: () => {
// Invalidate tasks query to refresh the list
queryClient.invalidateQueries({ queryKey: ["tasks"] });
},
...options,
});
};

View file

@ -7,9 +7,6 @@ import {
type Nudge = string; type Nudge = string;
const DEFAULT_NUDGES = [ const DEFAULT_NUDGES = [
"Show me this quarter's top 10 deals",
"Summarize recent client interactions",
"Search OpenSearch for mentions of our competitors",
]; ];
export const useGetNudgesQuery = ( export const useGetNudgesQuery = (

View file

@ -0,0 +1,79 @@
import {
type UseQueryOptions,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
export interface Task {
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TasksResponse {
tasks: Task[];
}
export const useGetTasksQuery = (
options?: Omit<UseQueryOptions<Task[]>, "queryKey" | "queryFn">
) => {
const queryClient = useQueryClient();
async function getTasks(): Promise<Task[]> {
const response = await fetch("/api/tasks");
if (!response.ok) {
throw new Error("Failed to fetch tasks");
}
const data: TasksResponse = await response.json();
return data.tasks || [];
}
const queryResult = useQuery(
{
queryKey: ["tasks"],
queryFn: getTasks,
refetchInterval: (query) => {
// Only poll if there are tasks with pending or running status
const data = query.state.data;
if (!data || data.length === 0) {
return false; // Stop polling if no tasks
}
const hasActiveTasks = data.some(
(task: Task) =>
task.status === "pending" ||
task.status === "running" ||
task.status === "processing"
);
return hasActiveTasks ? 3000 : false; // Poll every 3 seconds if active tasks exist
},
refetchIntervalInBackground: true,
staleTime: 0, // Always consider data stale to ensure fresh updates
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
...options,
},
queryClient,
);
return queryResult;
};

View file

@ -1,201 +1,204 @@
"use client"; "use client";
import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react"; import { ArrowLeft, Check, Copy, Loader2, Search, X } from "lucide-react";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useState } from "react";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { ProtectedRoute } from "@/components/protected-route"; import { ProtectedRoute } from "@/components/protected-route";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useLayout } from "@/contexts/layout-context"; import { useLayout } from "@/contexts/layout-context";
import { useTask } from "@/contexts/task-context"; import { useTask } from "@/contexts/task-context";
import { import {
type ChunkResult, type ChunkResult,
type File, type File,
useGetSearchQuery, useGetSearchQuery,
} from "../../api/queries/useGetSearchQuery"; } from "../../api/queries/useGetSearchQuery";
// import { Label } from "@/components/ui/label";
// import { Checkbox } from "@/components/ui/checkbox";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
const getFileTypeLabel = (mimetype: string) => { const getFileTypeLabel = (mimetype: string) => {
if (mimetype === "application/pdf") return "PDF"; if (mimetype === "application/pdf") return "PDF";
if (mimetype === "text/plain") return "Text"; if (mimetype === "text/plain") return "Text";
if (mimetype === "application/msword") return "Word Document"; if (mimetype === "application/msword") return "Word Document";
return "Unknown"; return "Unknown";
}; };
function ChunksPageContent() { function ChunksPageContent() {
const router = useRouter(); const router = useRouter();
const searchParams = useSearchParams(); const searchParams = useSearchParams();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter(); useKnowledgeFilter();
const { isMenuOpen } = useTask(); const { isMenuOpen } = useTask();
const { totalTopOffset } = useLayout(); const { totalTopOffset } = useLayout();
const filename = searchParams.get("filename"); const filename = searchParams.get("filename");
const [chunks, setChunks] = useState<ChunkResult[]>([]); const [chunks, setChunks] = useState<ChunkResult[]>([]);
const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState< const [chunksFilteredByQuery, setChunksFilteredByQuery] = useState<
ChunkResult[] ChunkResult[]
>([]); >([]);
const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set()); const [selectedChunks, setSelectedChunks] = useState<Set<number>>(new Set());
const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState< const [activeCopiedChunkIndex, setActiveCopiedChunkIndex] = useState<
number | null number | null
>(null); >(null);
// Calculate average chunk length // Calculate average chunk length
const averageChunkLength = useMemo( const averageChunkLength = useMemo(
() => () =>
chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) / chunks.reduce((acc, chunk) => acc + chunk.text.length, 0) /
chunks.length || 0, chunks.length || 0,
[chunks] [chunks],
); );
const [selectAll, setSelectAll] = useState(false); const [selectAll, setSelectAll] = useState(false);
const [queryInputText, setQueryInputText] = useState( const [queryInputText, setQueryInputText] = useState(
parsedFilterData?.query ?? "" parsedFilterData?.query ?? "",
); );
// Use the same search query as the knowledge page, but we'll filter for the specific file // Use the same search query as the knowledge page, but we'll filter for the specific file
const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData); const { data = [], isFetching } = useGetSearchQuery("*", parsedFilterData);
useEffect(() => { useEffect(() => {
if (queryInputText === "") { if (queryInputText === "") {
setChunksFilteredByQuery(chunks); setChunksFilteredByQuery(chunks);
} else { } else {
setChunksFilteredByQuery( setChunksFilteredByQuery(
chunks.filter((chunk) => chunks.filter((chunk) =>
chunk.text.toLowerCase().includes(queryInputText.toLowerCase()) chunk.text.toLowerCase().includes(queryInputText.toLowerCase()),
) ),
); );
} }
}, [queryInputText, chunks]); }, [queryInputText, chunks]);
const handleCopy = useCallback((text: string, index: number) => { const handleCopy = useCallback((text: string, index: number) => {
// Trim whitespace and remove new lines/tabs for cleaner copy // Trim whitespace and remove new lines/tabs for cleaner copy
navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, "")); navigator.clipboard.writeText(text.trim().replace(/[\n\r\t]/gm, ""));
setActiveCopiedChunkIndex(index); setActiveCopiedChunkIndex(index);
setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds setTimeout(() => setActiveCopiedChunkIndex(null), 10 * 1000); // 10 seconds
}, []); }, []);
const fileData = (data as File[]).find( const fileData = (data as File[]).find(
(file: File) => file.filename === filename (file: File) => file.filename === filename,
); );
// Extract chunks for the specific file // Extract chunks for the specific file
useEffect(() => { useEffect(() => {
if (!filename || !(data as File[]).length) { if (!filename || !(data as File[]).length) {
setChunks([]); setChunks([]);
return; return;
} }
setChunks( setChunks(
fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [] fileData?.chunks?.map((chunk, i) => ({ ...chunk, index: i + 1 })) || [],
); );
}, [data, filename]); }, [data, filename]);
// Set selected state for all checkboxes when selectAll changes // Set selected state for all checkboxes when selectAll changes
useEffect(() => { useEffect(() => {
if (selectAll) { if (selectAll) {
setSelectedChunks(new Set(chunks.map((_, index) => index))); setSelectedChunks(new Set(chunks.map((_, index) => index)));
} else { } else {
setSelectedChunks(new Set()); setSelectedChunks(new Set());
} }
}, [selectAll, setSelectedChunks, chunks]); }, [selectAll, setSelectedChunks, chunks]);
const handleBack = useCallback(() => { const handleBack = useCallback(() => {
router.push("/knowledge"); router.push("/knowledge");
}, [router]); }, [router]);
// const handleChunkCardCheckboxChange = useCallback( // const handleChunkCardCheckboxChange = useCallback(
// (index: number) => { // (index: number) => {
// setSelectedChunks((prevSelected) => { // setSelectedChunks((prevSelected) => {
// const newSelected = new Set(prevSelected); // const newSelected = new Set(prevSelected);
// if (newSelected.has(index)) { // if (newSelected.has(index)) {
// newSelected.delete(index); // newSelected.delete(index);
// } else { // } else {
// newSelected.add(index); // newSelected.add(index);
// } // }
// return newSelected; // return newSelected;
// }); // });
// }, // },
// [setSelectedChunks] // [setSelectedChunks]
// ); // );
if (!filename) { if (!filename) {
return ( return (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" /> <Search className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50" />
<p className="text-lg text-muted-foreground">No file specified</p> <p className="text-lg text-muted-foreground">No file specified</p>
<p className="text-sm text-muted-foreground/70 mt-2"> <p className="text-sm text-muted-foreground/70 mt-2">
Please select a file from the knowledge page Please select a file from the knowledge page
</p> </p>
</div> </div>
</div> </div>
); );
} }
return ( return (
<div <div
className={`fixed inset-0 md:left-72 flex flex-row transition-all duration-300 ${ className={`fixed inset-0 md:left-72 flex flex-row transition-all duration-300 ${
isMenuOpen && isPanelOpen isMenuOpen && isPanelOpen
? "md:right-[704px]" ? "md:right-[704px]"
: // Both open: 384px (menu) + 320px (KF panel) : // Both open: 384px (menu) + 320px (KF panel)
isMenuOpen isMenuOpen
? "md:right-96" ? "md:right-96"
: // Only menu open: 384px : // Only menu open: 384px
isPanelOpen isPanelOpen
? "md:right-80" ? "md:right-80"
: // Only KF panel open: 320px : // Only KF panel open: 320px
"md:right-6" // Neither open: 24px "md:right-6" // Neither open: 24px
}`} }`}
style={{ top: `${totalTopOffset}px` }} style={{ top: `${totalTopOffset}px` }}
> >
<div className="flex-1 flex flex-col min-h-0 px-6 py-6"> <div className="flex-1 flex flex-col min-h-0 px-6 py-6">
{/* Header */} {/* Header */}
<div className="flex flex-col mb-6"> <div className="flex flex-col mb-6">
<div className="flex flex-row items-center gap-3 mb-6"> <div className="flex flex-row items-center gap-3 mb-6">
<Button variant="ghost" onClick={handleBack} size="sm"> <Button variant="ghost" onClick={handleBack} size="sm">
<ArrowLeft size={24} /> <ArrowLeft size={24} />
</Button> </Button>
<h1 className="text-lg font-semibold"> <h1 className="text-lg font-semibold">
{/* Removes file extension from filename */} {/* Removes file extension from filename */}
{filename.replace(/\.[^/.]+$/, "")} {filename.replace(/\.[^/.]+$/, "")}
</h1> </h1>
</div> </div>
<div className="flex flex-col items-start mt-2"> <div className="flex flex-col items-start mt-2">
<div className="flex-1 flex items-center gap-2 w-full max-w-[640px]"> <div className="flex-1 flex items-center gap-2 w-full max-w-[640px]">
<div className="primary-input min-h-10 !flex items-center flex-nowrap focus-within:border-foreground transition-colors !p-[0.3rem]"> <div className="primary-input min-h-10 !flex items-center flex-nowrap focus-within:border-foreground transition-colors !p-[0.3rem]">
{selectedFilter?.name && ( {selectedFilter?.name && (
<div <div
className={`flex items-center gap-1 h-full px-1.5 py-0.5 mr-1 rounded max-w-[25%] ${ className={`flex items-center gap-1 h-full px-1.5 py-0.5 mr-1 rounded max-w-[25%] ${
filterAccentClasses[parsedFilterData?.color || "zinc"] filterAccentClasses[parsedFilterData?.color || "zinc"]
}`} }`}
> >
<span className="truncate">{selectedFilter?.name}</span> <span className="truncate">{selectedFilter?.name}</span>
<X <X
aria-label="Remove filter" aria-label="Remove filter"
className="h-4 w-4 flex-shrink-0 cursor-pointer" className="h-4 w-4 flex-shrink-0 cursor-pointer"
onClick={() => setSelectedFilter(null)} onClick={() => setSelectedFilter(null)}
/> />
</div> </div>
)} )}
<Search <Search
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground" className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
strokeWidth={1.5} strokeWidth={1.5}
/> />
<input <input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono" className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query" name="search-query"
id="search-query" id="search-query"
type="text" type="text"
placeholder="Enter your search query..." placeholder="Enter your search query..."
onChange={(e) => setQueryInputText(e.target.value)} onChange={(e) => setQueryInputText(e.target.value)}
value={queryInputText} value={queryInputText}
/> />
</div> </div>
</div> </div>
{/* <div className="flex items-center pl-4 gap-2"> {/* <div className="flex items-center pl-4 gap-2">
<Checkbox <Checkbox
id="selectAllChunks" id="selectAllChunks"
checked={selectAll} checked={selectAll}
@ -210,39 +213,39 @@ function ChunksPageContent() {
Select all Select all
</Label> </Label>
</div> */} </div> */}
</div> </div>
</div> </div>
{/* Content Area - matches knowledge page structure */} {/* Content Area - matches knowledge page structure */}
<div className="flex-1 overflow-scroll pr-6"> <div className="flex-1 overflow-scroll pr-6">
{isFetching ? ( {isFetching ? (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" /> <Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" />
<p className="text-lg text-muted-foreground"> <p className="text-lg text-muted-foreground">
Loading chunks... Loading chunks...
</p> </p>
</div> </div>
</div> </div>
) : chunks.length === 0 ? ( ) : chunks.length === 0 ? (
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<p className="text-xl font-semibold mb-2">No knowledge</p> <p className="text-xl font-semibold mb-2">No knowledge</p>
<p className="text-sm text-secondary-foreground"> <p className="text-sm text-secondary-foreground">
Clear the knowledge filter or return to the knowledge page Clear the knowledge filter or return to the knowledge page
</p> </p>
</div> </div>
</div> </div>
) : ( ) : (
<div className="space-y-4 pb-6"> <div className="space-y-4 pb-6">
{chunksFilteredByQuery.map((chunk, index) => ( {chunksFilteredByQuery.map((chunk, index) => (
<div <div
key={chunk.filename + index} key={chunk.filename + index}
className="bg-muted rounded-lg p-4 border border-border/50" className="bg-muted rounded-lg p-4 border border-border/50"
> >
<div className="flex items-center justify-between mb-2"> <div className="flex items-center justify-between mb-2">
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
{/* <div> {/* <div>
<Checkbox <Checkbox
checked={selectedChunks.has(index)} checked={selectedChunks.has(index)}
onCheckedChange={() => onCheckedChange={() =>
@ -250,73 +253,73 @@ function ChunksPageContent() {
} }
/> />
</div> */} </div> */}
<span className="text-sm font-bold"> <span className="text-sm font-bold">
Chunk {chunk.index} Chunk {chunk.index}
</span> </span>
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70"> <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.text.length} chars {chunk.text.length} chars
</span> </span>
<div className="py-1"> <div className="py-1">
<Button <Button
onClick={() => handleCopy(chunk.text, index)} onClick={() => handleCopy(chunk.text, index)}
variant="ghost" variant="ghost"
size="sm" size="sm"
> >
{activeCopiedChunkIndex === index ? ( {activeCopiedChunkIndex === index ? (
<Check className="text-muted-foreground" /> <Check className="text-muted-foreground" />
) : ( ) : (
<Copy className="text-muted-foreground" /> <Copy className="text-muted-foreground" />
)} )}
</Button> </Button>
</div> </div>
</div> </div>
<span className="bg-background p-1 rounded text-xs text-muted-foreground/70"> <span className="bg-background p-1 rounded text-xs text-muted-foreground/70">
{chunk.score.toFixed(2)} score {chunk.score.toFixed(2)} score
</span> </span>
{/* TODO: Update to use active toggle */} {/* TODO: Update to use active toggle */}
{/* <span className="px-2 py-1 text-green-500"> {/* <span className="px-2 py-1 text-green-500">
<Switch <Switch
className="ml-2 bg-green-500" className="ml-2 bg-green-500"
checked={true} checked={true}
/> />
Active Active
</span> */} </span> */}
</div> </div>
<blockquote className="text-sm text-muted-foreground leading-relaxed ml-1.5"> <blockquote className="text-sm text-muted-foreground leading-relaxed ml-1.5">
{chunk.text} {chunk.text}
</blockquote> </blockquote>
</div> </div>
))} ))}
</div> </div>
)} )}
</div> </div>
</div> </div>
{/* Right panel - Summary (TODO), Technical details, */} {/* Right panel - Summary (TODO), Technical details, */}
{chunks.length > 0 && ( {chunks.length > 0 && (
<div className="w-[320px] py-20 px-2"> <div className="w-[320px] py-20 px-2">
<div className="mb-8"> <div className="mb-8">
<h2 className="text-xl font-semibold mt-3 mb-4"> <h2 className="text-xl font-semibold mt-3 mb-4">
Technical details Technical details
</h2> </h2>
<dl> <dl>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground"> <dt className="text-sm/6 text-muted-foreground">
Total chunks Total chunks
</dt> </dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{chunks.length} {chunks.length}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Avg length</dt> <dt className="text-sm/6 text-muted-foreground">Avg length</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{averageChunkLength.toFixed(0)} chars {averageChunkLength.toFixed(0)} chars
</dd> </dd>
</div> </div>
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Process time</dt> <dt className="text-sm/6 text-muted-foreground">Process time</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
@ -326,79 +329,79 @@ function ChunksPageContent() {
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
<div className="mb-8"> <div className="mb-8">
<h2 className="text-xl font-semibold mt-2 mb-3"> <h2 className="text-xl font-semibold mt-2 mb-3">
Original document Original document
</h2> </h2>
<dl> <dl>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Name</dt> <dt className="text-sm/6 text-muted-foreground">Name</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.filename} {fileData?.filename}
</dd> </dd>
</div> */} </div> */}
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Type</dt> <dt className="text-sm/6 text-muted-foreground">Type</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"} {fileData ? getFileTypeLabel(fileData.mimetype) : "Unknown"}
</dd> </dd>
</div> </div>
<div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Size</dt> <dt className="text-sm/6 text-muted-foreground">Size</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
{fileData?.size {fileData?.size
? `${Math.round(fileData.size / 1024)} KB` ? `${Math.round(fileData.size / 1024)} KB`
: "Unknown"} : "Unknown"}
</dd> </dd>
</div> </div>
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Uploaded</dt> <dt className="text-sm/6 text-muted-foreground">Uploaded</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
{/* TODO: Uncomment after data is available */} {/* TODO: Uncomment after data is available */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Source</dt> <dt className="text-sm/6 text-muted-foreground">Source</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"></dd>
</div> */} </div> */}
{/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5"> {/* <div className="sm:grid sm:grid-cols-3 sm:gap-4 sm:px-0 mb-2.5">
<dt className="text-sm/6 text-muted-foreground">Updated</dt> <dt className="text-sm/6 text-muted-foreground">Updated</dt>
<dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0"> <dd className="mt-1 text-sm/6 text-gray-100 sm:col-span-2 sm:mt-0">
N/A N/A
</dd> </dd>
</div> */} </div> */}
</dl> </dl>
</div> </div>
</div> </div>
)} )}
</div> </div>
); );
} }
function ChunksPage() { function ChunksPage() {
return ( return (
<Suspense <Suspense
fallback={ fallback={
<div className="flex items-center justify-center h-64"> <div className="flex items-center justify-center h-64">
<div className="text-center"> <div className="text-center">
<Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" /> <Loader2 className="h-12 w-12 mx-auto mb-4 text-muted-foreground/50 animate-spin" />
<p className="text-lg text-muted-foreground">Loading...</p> <p className="text-lg text-muted-foreground">Loading...</p>
</div> </div>
</div> </div>
} }
> >
<ChunksPageContent /> <ChunksPageContent />
</Suspense> </Suspense>
); );
} }
export default function ProtectedChunksPage() { export default function ProtectedChunksPage() {
return ( return (
<ProtectedRoute> <ProtectedRoute>
<ChunksPage /> <ChunksPage />
</ProtectedRoute> </ProtectedRoute>
); );
} }

View file

@ -1,10 +1,24 @@
"use client"; "use client";
import type { ColDef } from "ag-grid-community"; import type { ColDef, GetRowIdParams } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react"; import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react"; import {
Building2,
Cloud,
Globe,
HardDrive,
Search,
Trash2,
X,
} from "lucide-react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { type ChangeEvent, useCallback, useRef, useState } from "react"; import {
type ChangeEvent,
useCallback,
useEffect,
useRef,
useState,
} from "react";
import { SiGoogledrive } from "react-icons/si"; import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb"; import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown"; import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@ -18,14 +32,16 @@ import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css"; import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner"; import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown"; import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
import { StatusBadge } from "@/components/ui/status-badge"; import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog"; import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument"; import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import { filterAccentClasses } from "@/components/knowledge-filter-panel";
// Function to get the appropriate icon for a connector type // Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) { function getSourceIcon(connectorType?: string) {
switch (connectorType) { switch (connectorType) {
case "url":
return <Globe className="h-4 w-4 text-muted-foreground flex-shrink-0" />;
case "google_drive": case "google_drive":
return ( return (
<SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" /> <SiGoogledrive className="h-4 w-4 text-foreground flex-shrink-0" />
@ -47,7 +63,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() { function SearchPage() {
const router = useRouter(); const router = useRouter();
const { isMenuOpen, files: taskFiles } = useTask(); const { isMenuOpen, files: taskFiles, refreshTasks } = useTask();
const { totalTopOffset } = useLayout(); const { totalTopOffset } = useLayout();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } = const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter(); useKnowledgeFilter();
@ -56,15 +72,14 @@ function SearchPage() {
const deleteDocumentMutation = useDeleteDocument(); const deleteDocumentMutation = useDeleteDocument();
const { data = [], isFetching } = useGetSearchQuery( useEffect(() => {
refreshTasks();
}, [refreshTasks]);
const { data: searchData = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*", parsedFilterData?.query || "*",
parsedFilterData parsedFilterData,
); );
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
// Convert TaskFiles to File format and merge with backend results // Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => { const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return { return {
@ -77,13 +92,32 @@ function SearchPage() {
}; };
}); });
const backendFiles = data as File[]; // Create a map of task files by filename for quick lookup
const taskFileMap = new Map(
taskFilesAsFiles.map((file) => [file.filename, file]),
);
// Override backend files with task file status if they exist
const backendFiles = (searchData as File[])
.map((file) => {
const taskFile = taskFileMap.get(file.filename);
if (taskFile) {
// Override backend file with task file data (includes status)
return { ...file, ...taskFile };
}
return file;
})
.filter((file) => {
// Only filter out files that are currently processing AND in taskFiles
const taskFile = taskFileMap.get(file.filename);
return !taskFile || taskFile.status !== "processing";
});
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return ( return (
taskFile.status !== "active" && taskFile.status !== "active" &&
!backendFiles.some( !backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename (backendFile) => backendFile.filename === taskFile.filename,
) )
); );
}); });
@ -91,41 +125,60 @@ function SearchPage() {
// Combine task files first, then backend files // Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles]; const fileResults = [...backendFiles, ...filteredTaskFiles];
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
const gridRef = useRef<AgGridReact>(null); const gridRef = useRef<AgGridReact>(null);
const [columnDefs] = useState<ColDef<File>[]>([ const columnDefs = [
{ {
field: "filename", field: "filename",
headerName: "Source", headerName: "Source",
checkboxSelection: true, checkboxSelection: (params: CustomCellRendererProps<File>) =>
(params?.data?.status || "active") === "active",
headerCheckboxSelection: true, headerCheckboxSelection: true,
initialFlex: 2, initialFlex: 2,
minWidth: 220, minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => { cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
// Read status directly from data on each render
const status = data?.status || "active";
const isActive = status === "active";
console.log(data?.filename, status, "a");
return ( return (
<button <div className="flex items-center overflow-hidden w-full">
type="button" <div
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full" className={`transition-opacity duration-200 ${
onClick={() => { isActive ? "w-0" : "w-7"
router.push( }`}
`/knowledge/chunks?filename=${encodeURIComponent( ></div>
data?.filename ?? "" <button
)}` type="button"
); className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left flex-1 overflow-hidden"
}} onClick={() => {
> if (!isActive) {
{getSourceIcon(data?.connector_type)} return;
<span className="font-medium text-foreground truncate"> }
{value} router.push(
</span> `/knowledge/chunks?filename=${encodeURIComponent(
</button> data?.filename ?? "",
)}`,
);
}}
>
{getSourceIcon(data?.connector_type)}
<span className="font-medium text-foreground truncate">
{value}
</span>
</button>
</div>
); );
}, },
}, },
{ {
field: "size", field: "size",
headerName: "Size", headerName: "Size",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.value ? `${Math.round(params.value / 1024)} KB` : "-", params.value ? `${Math.round(params.value / 1024)} KB` : "-",
}, },
{ {
@ -135,13 +188,14 @@ function SearchPage() {
{ {
field: "owner", field: "owner",
headerName: "Owner", headerName: "Owner",
valueFormatter: (params) => valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.owner_name || params.data?.owner_email || "—", params.data?.owner_name || params.data?.owner_email || "—",
}, },
{ {
field: "chunkCount", field: "chunkCount",
headerName: "Chunks", headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-", valueFormatter: (params: CustomCellRendererProps<File>) =>
params.data?.chunkCount?.toString() || "-",
}, },
{ {
field: "avgScore", field: "avgScore",
@ -159,6 +213,7 @@ function SearchPage() {
field: "status", field: "status",
headerName: "Status", headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
console.log(data?.filename, data?.status, "b");
// Default to 'active' status if no status is provided // Default to 'active' status if no status is provided
const status = data?.status || "active"; const status = data?.status || "active";
return <StatusBadge status={status} />; return <StatusBadge status={status} />;
@ -166,6 +221,10 @@ function SearchPage() {
}, },
{ {
cellRenderer: ({ data }: CustomCellRendererProps<File>) => { cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
const status = data?.status || "active";
if (status !== "active") {
return null;
}
return <KnowledgeActionsDropdown filename={data?.filename || ""} />; return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
}, },
cellStyle: { cellStyle: {
@ -182,7 +241,7 @@ function SearchPage() {
sortable: false, sortable: false,
initialFlex: 0, initialFlex: 0,
}, },
]); ];
const defaultColDef: ColDef<File> = { const defaultColDef: ColDef<File> = {
resizable: false, resizable: false,
@ -204,7 +263,7 @@ function SearchPage() {
try { try {
// Delete each file individually since the API expects one filename at a time // Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) => const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename }) deleteDocumentMutation.mutateAsync({ filename: row.filename }),
); );
await Promise.all(deletePromises); await Promise.all(deletePromises);
@ -212,7 +271,7 @@ function SearchPage() {
toast.success( toast.success(
`Successfully deleted ${selectedRows.length} document${ `Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : "" selectedRows.length > 1 ? "s" : ""
}` }`,
); );
setSelectedRows([]); setSelectedRows([]);
setShowBulkDeleteDialog(false); setShowBulkDeleteDialog(false);
@ -225,7 +284,7 @@ function SearchPage() {
toast.error( toast.error(
error instanceof Error error instanceof Error
? error.message ? error.message
: "Failed to delete some documents" : "Failed to delete some documents",
); );
} }
}; };
@ -270,16 +329,13 @@ function SearchPage() {
/> />
</div> </div>
)} )}
<Search <Search className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground" />
className="h-4 w-4 ml-1 flex-shrink-0 text-placeholder-foreground"
strokeWidth={1.5}
/>
<input <input
className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono" className="bg-transparent w-full h-full ml-2 focus:outline-none focus-visible:outline-none font-mono placeholder:font-mono"
name="search-query" name="search-query"
id="search-query" id="search-query"
type="text" type="text"
placeholder="Search your documents..." placeholder="Enter your search query..."
onChange={handleTableSearch} onChange={handleTableSearch}
/> />
</div> </div>
@ -317,7 +373,7 @@ function SearchPage() {
</div> </div>
<AgGridReact <AgGridReact
className="w-full overflow-auto" className="w-full overflow-auto"
columnDefs={columnDefs} columnDefs={columnDefs as ColDef<File>[]}
defaultColDef={defaultColDef} defaultColDef={defaultColDef}
loading={isFetching} loading={isFetching}
ref={gridRef} ref={gridRef}
@ -325,7 +381,7 @@ function SearchPage() {
rowSelection="multiple" rowSelection="multiple"
rowMultiSelectWithClick={false} rowMultiSelectWithClick={false}
suppressRowClickSelection={true} suppressRowClickSelection={true}
getRowId={(params) => params.data.filename} getRowId={(params: GetRowIdParams<File>) => params.data?.filename}
domLayout="normal" domLayout="normal"
onSelectionChanged={onSelectionChanged} onSelectionChanged={onSelectionChanged}
noRowsOverlayComponent={() => ( noRowsOverlayComponent={() => (

View file

@ -633,30 +633,54 @@ function KnowledgeSourcesPage() {
</div> </div>
{/* Conditional Sync Settings or No-Auth Message */} {/* Conditional Sync Settings or No-Auth Message */}
{ {
isNoAuthMode ? ( isNoAuthMode ? (
<Card className="border-yellow-500/50 bg-yellow-500/5"> <Card className="border-yellow-500">
<CardHeader> <CardHeader>
<CardTitle className="text-lg text-yellow-600"> <CardTitle className="text-lg">
Cloud connectors are only available with auth mode enabled Cloud connectors require authentication
</CardTitle> </CardTitle>
<CardDescription className="text-sm"> <CardDescription className="text-sm">
Please provide the following environment variables and Add the Google OAuth variables below to your <code>.env</code>{" "}
restart: then restart the OpenRAG containers.
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
<div className="bg-muted rounded-md p-4 font-mono text-sm"> <div className="bg-muted rounded-md p-4 font-mono text-sm">
<div className="text-muted-foreground mb-2"> <div className="text-muted-foreground">
# make here <div>
https://console.cloud.google.com/apis/credentials <span className="mr-3 text-placeholder-foreground">
</div> 27
<div>GOOGLE_OAUTH_CLIENT_ID=</div> </span>
<div>GOOGLE_OAUTH_CLIENT_SECRET=</div> <span># Google OAuth</span>
</div> </div>
</CardContent> <div>
</Card> <span className="mr-3 text-placeholder-foreground">
) : null 28
</span>
<span># Create credentials here:</span>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">
29
</span>
<span>
# https://console.cloud.google.com/apis/credentials
</span>
</div>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">30</span>
<span>GOOGLE_OAUTH_CLIENT_ID=</span>
</div>
<div>
<span className="mr-3 text-placeholder-foreground">31</span>
<span>GOOGLE_OAUTH_CLIENT_SECRET=</span>
</div>
</div>
</CardContent>
</Card>
) : null
// <div className="flex items-center justify-between py-4"> // <div className="flex items-center justify-between py-4">
// <div> // <div>
// <h3 className="text-lg font-medium">Sync Settings</h3> // <h3 className="text-lg font-medium">Sync Settings</h3>

View file

@ -1,6 +1,6 @@
"use client" "use client"
import { useState } from 'react' import { useEffect, useState } from 'react'
import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react' import { Bell, CheckCircle, XCircle, Clock, Loader2, ChevronDown, ChevronUp, X } from 'lucide-react'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
@ -8,9 +8,16 @@ import { Badge } from '@/components/ui/badge'
import { useTask, Task } from '@/contexts/task-context' import { useTask, Task } from '@/contexts/task-context'
export function TaskNotificationMenu() { export function TaskNotificationMenu() {
const { tasks, isFetching, isMenuOpen, cancelTask } = useTask() const { tasks, isFetching, isMenuOpen, isRecentTasksExpanded, cancelTask } = useTask()
const [isExpanded, setIsExpanded] = useState(false) const [isExpanded, setIsExpanded] = useState(false)
// Sync local state with context state
useEffect(() => {
if (isRecentTasksExpanded) {
setIsExpanded(true)
}
}, [isRecentTasksExpanded])
// Don't render if menu is closed // Don't render if menu is closed
if (!isMenuOpen) return null if (!isMenuOpen) return null

View file

@ -1,26 +1,16 @@
interface AnimatedProcessingIconProps { import type { SVGProps } from "react";
className?: string;
size?: number;
}
export const AnimatedProcessingIcon = ({ export const AnimatedProcessingIcon = (props: SVGProps<SVGSVGElement>) => {
className = "", return (
size = 10, <svg
}: AnimatedProcessingIconProps) => { viewBox="0 0 8 12"
const width = Math.round((size * 6) / 10); fill="none"
const height = size; xmlns="http://www.w3.org/2000/svg"
{...props}
return ( >
<svg <title>Processing</title>
width={width} <style>
height={height} {`
viewBox="0 0 6 10"
fill="none"
xmlns="http://www.w3.org/2000/svg"
className={className}
>
<style>
{`
.dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; } .dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
.dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; } .dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
.dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; } .dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
@ -30,20 +20,18 @@ export const AnimatedProcessingIcon = ({
@keyframes pulse-wave { @keyframes pulse-wave {
0%, 60%, 100% { 0%, 60%, 100% {
opacity: 0.25; opacity: 0.25;
transform: scale(1);
} }
30% { 30% {
opacity: 1; opacity: 1;
transform: scale(1.2);
} }
} }
`} `}
</style> </style>
<circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" /> <circle className="dot-1" cx="2" cy="6" r="1" fill="currentColor" />
<circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" /> <circle className="dot-2" cx="2" cy="10" r="1" fill="currentColor" />
<circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" /> <circle className="dot-3" cx="6" cy="2" r="1" fill="currentColor" />
<circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" /> <circle className="dot-4" cx="6" cy="6" r="1" fill="currentColor" />
<circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" /> <circle className="dot-5" cx="6" cy="10" r="1" fill="currentColor" />
</svg> </svg>
); );
}; };

View file

@ -50,7 +50,7 @@ export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
}`} }`}
> >
{status === "processing" && ( {status === "processing" && (
<AnimatedProcessingIcon className="text-current mr-2" size={10} /> <AnimatedProcessingIcon className="text-current h-3 w-3 shrink-0" />
)} )}
{config.label} {config.label}
</div> </div>

View file

@ -7,33 +7,18 @@ import {
useCallback, useCallback,
useContext, useContext,
useEffect, useEffect,
useRef,
useState, useState,
} from "react"; } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { useCancelTaskMutation } from "@/app/api/mutations/useCancelTaskMutation";
import {
type Task,
useGetTasksQuery,
} from "@/app/api/queries/useGetTasksQuery";
import { useAuth } from "@/contexts/auth-context"; import { useAuth } from "@/contexts/auth-context";
export interface Task { // Task interface is now imported from useGetTasksQuery
task_id: string;
status:
| "pending"
| "running"
| "processing"
| "completed"
| "failed"
| "error";
total_files?: number;
processed_files?: number;
successful_files?: number;
failed_files?: number;
running_files?: number;
pending_files?: number;
created_at: string;
updated_at: string;
duration_seconds?: number;
result?: Record<string, unknown>;
error?: string;
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile { export interface TaskFile {
filename: string; filename: string;
@ -51,27 +36,54 @@ interface TaskContextType {
files: TaskFile[]; files: TaskFile[];
addTask: (taskId: string) => void; addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void; addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>; refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>; cancelTask: (taskId: string) => Promise<void>;
isPolling: boolean; isPolling: boolean;
isFetching: boolean; isFetching: boolean;
isMenuOpen: boolean; isMenuOpen: boolean;
toggleMenu: () => void; toggleMenu: () => void;
isRecentTasksExpanded: boolean;
setRecentTasksExpanded: (expanded: boolean) => void;
// React Query states
isLoading: boolean;
error: Error | null;
} }
const TaskContext = createContext<TaskContextType | undefined>(undefined); const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) { export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]); const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false); const [isMenuOpen, setIsMenuOpen] = useState(false);
const [isRecentTasksExpanded, setIsRecentTasksExpanded] = useState(false);
const previousTasksRef = useRef<Task[]>([]);
const { isAuthenticated, isNoAuthMode } = useAuth(); const { isAuthenticated, isNoAuthMode } = useAuth();
const queryClient = useQueryClient(); const queryClient = useQueryClient();
// Use React Query hooks
const {
data: tasks = [],
isLoading,
error,
refetch: refetchTasks,
isFetching,
} = useGetTasksQuery({
enabled: isAuthenticated || isNoAuthMode,
});
const cancelTaskMutation = useCancelTaskMutation({
onSuccess: () => {
toast.success("Task cancelled", {
description: "Task has been cancelled successfully",
});
},
onError: (error) => {
toast.error("Failed to cancel task", {
description: error.message,
});
},
});
const refetchSearch = useCallback(() => { const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({ queryClient.invalidateQueries({
queryKey: ["search"], queryKey: ["search"],
@ -99,265 +111,216 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
[], [],
); );
const fetchTasks = useCallback(async () => { // Handle task status changes and file updates
if (!isAuthenticated && !isNoAuthMode) return; useEffect(() => {
if (tasks.length === 0) {
setIsFetching(true); // Store current tasks as previous for next comparison
try { previousTasksRef.current = tasks;
const response = await fetch("/api/tasks"); return;
if (response.ok) {
const data = await response.json();
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
(t) => t.task_id === newTask.task_id,
);
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if (
oldTask &&
oldTask.status !== "completed" &&
newTask.status === "completed"
) {
// Task just completed - show success toast
toast.success("Task completed successfully", {
description: `Task ${newTask.task_id} has finished processing.`,
action: {
label: "View",
onClick: () => console.log("View task", newTask.task_id),
},
});
refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if (
oldTask &&
oldTask.status !== "failed" &&
oldTask.status !== "error" &&
(newTask.status === "failed" || newTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${newTask.task_id} failed: ${
newTask.error || "Unknown error"
}`,
});
// Files will be updated to failed status by the file parsing logic above
}
});
}
return newTasks;
});
}
} catch (error) {
console.error("Failed to fetch tasks:", error);
} finally {
setIsFetching(false);
} }
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => { // Check for task status changes by comparing with previous tasks
// Immediately start aggressive polling for the new task tasks.forEach((currentTask) => {
let pollAttempts = 0; const previousTask = previousTasksRef.current.find(
const maxPollAttempts = 30; // Poll for up to 30 seconds (prev) => prev.task_id === currentTask.task_id,
);
const aggressivePoll = async () => { // Only show toasts if we have previous data and status has changed
try { if (
const response = await fetch("/api/tasks"); (previousTask && previousTask.status !== currentTask.status) ||
if (response.ok) { (!previousTask && previousTasksRef.current.length !== 0)
const data = await response.json(); ) {
const newTasks = data.tasks || []; // Process files from failed task and add them to files list
const foundTask = newTasks.find( if (currentTask.files && typeof currentTask.files === "object") {
(task: Task) => task.task_id === taskId, const taskFileEntries = Object.entries(currentTask.files);
); const now = new Date().toISOString();
if (foundTask) { taskFileEntries.forEach(([filePath, fileInfo]) => {
// Task found! Update the tasks state if (typeof fileInfo === "object" && fileInfo) {
setTasks((prevTasks) => { // Use the filename from backend if available, otherwise extract from path
// Check if task is already in the list const fileName =
const exists = prevTasks.some((t) => t.task_id === taskId); (fileInfo as any).filename ||
if (!exists) { filePath.split("/").pop() ||
return [...prevTasks, foundTask]; filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
} }
// Update existing task
return prevTasks.map((t) => setFiles((prevFiles) => {
t.task_id === taskId ? foundTask : t, const existingFileIndex = prevFiles.findIndex(
); (f) =>
}); f.source_url === filePath &&
return; // Stop polling, we found it f.task_id === currentTask.task_id,
} );
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: currentTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
} }
} catch (error) { if (
console.error("Aggressive polling failed:", error); previousTask &&
} previousTask.status !== "completed" &&
currentTask.status === "completed"
) {
// Task just completed - show success toast with file counts
const successfulFiles = currentTask.successful_files || 0;
const failedFiles = currentTask.failed_files || 0;
pollAttempts++; let description = "";
if (pollAttempts < maxPollAttempts) { if (failedFiles > 0) {
// Continue polling every 1 second for new tasks description = `${successfulFiles} file${
setTimeout(aggressivePoll, 1000); successfulFiles !== 1 ? "s" : ""
} } uploaded successfully, ${failedFiles} file${
}; failedFiles !== 1 ? "s" : ""
} failed`;
} else {
description = `${successfulFiles} file${
successfulFiles !== 1 ? "s" : ""
} uploaded successfully`;
}
// Start aggressive polling after a short delay to allow backend to process toast.success("Task completed", {
setTimeout(aggressivePoll, 500); description,
}, []); action: {
label: "View",
onClick: () => {
setIsMenuOpen(true);
setIsRecentTasksExpanded(true);
},
},
});
setTimeout(() => {
setFiles((prevFiles) =>
prevFiles.filter(
(file) =>
file.task_id !== currentTask.task_id ||
file.status === "failed",
),
);
refetchSearch();
}, 500);
} else if (
previousTask &&
previousTask.status !== "failed" &&
previousTask.status !== "error" &&
(currentTask.status === "failed" || currentTask.status === "error")
) {
// Task just failed - show error toast
toast.error("Task failed", {
description: `Task ${currentTask.task_id} failed: ${
currentTask.error || "Unknown error"
}`,
});
}
}
});
// Store current tasks as previous for next comparison
previousTasksRef.current = tasks;
}, [tasks, refetchSearch]);
const addTask = useCallback(
(_taskId: string) => {
// React Query will automatically handle polling when tasks are active
// Just trigger a refetch to get the latest data
setTimeout(() => {
refetchTasks();
}, 500);
},
[refetchTasks],
);
const refreshTasks = useCallback(async () => { const refreshTasks = useCallback(async () => {
await fetchTasks(); setFiles([]);
}, [fetchTasks]); await refetchTasks();
}, [refetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback( const cancelTask = useCallback(
async (taskId: string) => { async (taskId: string) => {
try { cancelTaskMutation.mutate({ taskId });
const response = await fetch(`/api/tasks/${taskId}/cancel`, {
method: "POST",
});
if (response.ok) {
// Immediately refresh tasks to show the updated status
await fetchTasks();
toast.success("Task cancelled", {
description: `Task ${taskId.substring(0, 8)}... has been cancelled`,
});
} else {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error || "Failed to cancel task");
}
} catch (error) {
console.error("Failed to cancel task:", error);
toast.error("Failed to cancel task", {
description: error instanceof Error ? error.message : "Unknown error",
});
}
}, },
[fetchTasks], [cancelTaskMutation],
); );
const toggleMenu = useCallback(() => { const toggleMenu = useCallback(() => {
setIsMenuOpen((prev) => !prev); setIsMenuOpen((prev) => !prev);
}, []); }, []);
// Periodic polling for task updates // Determine if we're polling based on React Query's refetch interval
useEffect(() => { const isPolling =
if (!isAuthenticated && !isNoAuthMode) return; isFetching &&
tasks.some(
setIsPolling(true); (task) =>
task.status === "pending" ||
// Initial fetch task.status === "running" ||
fetchTasks(); task.status === "processing",
);
// Set up polling interval - every 3 seconds (more responsive for active tasks)
const interval = setInterval(fetchTasks, 3000);
return () => {
clearInterval(interval);
setIsPolling(false);
};
}, [isAuthenticated, isNoAuthMode, fetchTasks]);
const value: TaskContextType = { const value: TaskContextType = {
tasks, tasks,
files, files,
addTask, addTask,
addFiles, addFiles,
removeTask,
refreshTasks, refreshTasks,
cancelTask, cancelTask,
isPolling, isPolling,
isFetching, isFetching,
isMenuOpen, isMenuOpen,
toggleMenu, toggleMenu,
isRecentTasksExpanded,
setRecentTasksExpanded: setIsRecentTasksExpanded,
isLoading,
error,
}; };
return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>; return <TaskContext.Provider value={value}>{children}</TaskContext.Provider>;

View file

@ -6,14 +6,13 @@ from config.settings import INDEX_NAME
logger = get_logger(__name__) logger = get_logger(__name__)
async def delete_documents_by_filename(request: Request, document_service, session_manager): async def check_filename_exists(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename""" """Check if a document with a specific filename already exists"""
data = await request.json() filename = request.query_params.get("filename")
filename = data.get("filename")
if not filename: if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400) return JSONResponse({"error": "filename parameter is required"}, status_code=400)
user = request.state.user user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token) jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
@ -22,34 +21,79 @@ async def delete_documents_by_filename(request: Request, document_service, sessi
opensearch_client = session_manager.get_user_opensearch_client( opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token user.user_id, jwt_token
) )
# Search for any document with this exact filename
from utils.opensearch_queries import build_filename_search_body
search_body = build_filename_search_body(filename, size=1, source=["filename"])
logger.debug(f"Checking filename existence: {filename}")
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
exists = len(hits) > 0
logger.debug(f"Filename check result - exists: {exists}, hits: {len(hits)}")
return JSONResponse({
"exists": exists,
"filename": filename
}, status_code=200)
except Exception as e:
logger.error("Error checking filename existence", filename=filename, error=str(e))
error_str = str(e)
if "AuthenticationException" in error_str:
return JSONResponse({"error": "Access denied: insufficient permissions"}, status_code=403)
else:
return JSONResponse({"error": str(e)}, status_code=500)
async def delete_documents_by_filename(request: Request, document_service, session_manager):
"""Delete all documents with a specific filename"""
data = await request.json()
filename = data.get("filename")
if not filename:
return JSONResponse({"error": "filename is required"}, status_code=400)
user = request.state.user
jwt_token = session_manager.get_effective_jwt_token(user.user_id, request.state.jwt_token)
try:
# Get user's OpenSearch client
opensearch_client = session_manager.get_user_opensearch_client(
user.user_id, jwt_token
)
# Delete by query to remove all chunks of this document # Delete by query to remove all chunks of this document
delete_query = { from utils.opensearch_queries import build_filename_delete_body
"query": {
"bool": { delete_query = build_filename_delete_body(filename)
"must": [
{"term": {"filename": filename}} logger.debug(f"Deleting documents with filename: {filename}")
]
}
}
}
result = await opensearch_client.delete_by_query( result = await opensearch_client.delete_by_query(
index=INDEX_NAME, index=INDEX_NAME,
body=delete_query, body=delete_query,
conflicts="proceed" conflicts="proceed"
) )
deleted_count = result.get("deleted", 0) deleted_count = result.get("deleted", 0)
logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id) logger.info(f"Deleted {deleted_count} chunks for filename {filename}", user_id=user.user_id)
return JSONResponse({ return JSONResponse({
"success": True, "success": True,
"deleted_chunks": deleted_count, "deleted_chunks": deleted_count,
"filename": filename, "filename": filename,
"message": f"All documents with filename '{filename}' deleted successfully" "message": f"All documents with filename '{filename}' deleted successfully"
}, status_code=200) }, status_code=200)
except Exception as e: except Exception as e:
logger.error("Error deleting documents by filename", filename=filename, error=str(e)) logger.error("Error deleting documents by filename", filename=filename, error=str(e))
error_str = str(e) error_str = str(e)

View file

@ -189,19 +189,20 @@ async def upload_and_ingest_user_file(
# Create temporary file for task processing # Create temporary file for task processing
import tempfile import tempfile
import os import os
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Create temporary file with the actual filename (not a temp prefix)
# Store in temp directory but use the real filename
temp_dir = tempfile.gettempdir()
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
try: try:
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, 'wb') as temp_file:
temp_file.write(content) temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path) logger.debug("Created temporary file for task processing", temp_path=temp_path)

View file

@ -13,27 +13,27 @@ logger = get_logger(__name__)
async def upload_ingest_router( async def upload_ingest_router(
request: Request, request: Request,
document_service=None, document_service=None,
langflow_file_service=None, langflow_file_service=None,
session_manager=None, session_manager=None,
task_service=None task_service=None,
): ):
""" """
Router endpoint that automatically routes upload requests based on configuration. Router endpoint that automatically routes upload requests based on configuration.
- If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload) - If DISABLE_INGEST_WITH_LANGFLOW is True: uses traditional OpenRAG upload (/upload)
- If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service - If DISABLE_INGEST_WITH_LANGFLOW is False (default): uses Langflow upload-ingest via task service
This provides a single endpoint that users can call regardless of backend configuration. This provides a single endpoint that users can call regardless of backend configuration.
All langflow uploads are processed as background tasks for better scalability. All langflow uploads are processed as background tasks for better scalability.
""" """
try: try:
logger.debug( logger.debug(
"Router upload_ingest endpoint called", "Router upload_ingest endpoint called",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
) )
# Route based on configuration # Route based on configuration
if DISABLE_INGEST_WITH_LANGFLOW: if DISABLE_INGEST_WITH_LANGFLOW:
# Route to traditional OpenRAG upload # Route to traditional OpenRAG upload
@ -42,8 +42,10 @@ async def upload_ingest_router(
else: else:
# Route to Langflow upload and ingest using task service # Route to Langflow upload and ingest using task service
logger.debug("Routing to Langflow upload-ingest pipeline via task service") logger.debug("Routing to Langflow upload-ingest pipeline via task service")
return await langflow_upload_ingest_task(request, langflow_file_service, session_manager, task_service) return await langflow_upload_ingest_task(
request, langflow_file_service, session_manager, task_service
)
except Exception as e: except Exception as e:
logger.error("Error in upload_ingest_router", error=str(e)) logger.error("Error in upload_ingest_router", error=str(e))
error_msg = str(e) error_msg = str(e)
@ -57,17 +59,14 @@ async def upload_ingest_router(
async def langflow_upload_ingest_task( async def langflow_upload_ingest_task(
request: Request, request: Request, langflow_file_service, session_manager, task_service
langflow_file_service,
session_manager,
task_service
): ):
"""Task-based langflow upload and ingest for single/multiple files""" """Task-based langflow upload and ingest for single/multiple files"""
try: try:
logger.debug("Task-based langflow upload_ingest endpoint called") logger.debug("Task-based langflow upload_ingest endpoint called")
form = await request.form() form = await request.form()
upload_files = form.getlist("file") upload_files = form.getlist("file")
if not upload_files or len(upload_files) == 0: if not upload_files or len(upload_files) == 0:
logger.error("No files provided in task-based upload request") logger.error("No files provided in task-based upload request")
return JSONResponse({"error": "Missing files"}, status_code=400) return JSONResponse({"error": "Missing files"}, status_code=400)
@ -77,14 +76,16 @@ async def langflow_upload_ingest_task(
settings_json = form.get("settings") settings_json = form.get("settings")
tweaks_json = form.get("tweaks") tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true" delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
replace_duplicates = form.get("replace_duplicates", "false").lower() == "true"
# Parse JSON fields if provided # Parse JSON fields if provided
settings = None settings = None
tweaks = None tweaks = None
if settings_json: if settings_json:
try: try:
import json import json
settings = json.loads(settings_json) settings = json.loads(settings_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e)) logger.error("Invalid settings JSON", error=str(e))
@ -93,6 +94,7 @@ async def langflow_upload_ingest_task(
if tweaks_json: if tweaks_json:
try: try:
import json import json
tweaks = json.loads(tweaks_json) tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e)) logger.error("Invalid tweaks JSON", error=str(e))
@ -106,28 +108,37 @@ async def langflow_upload_ingest_task(
jwt_token = getattr(request.state, "jwt_token", None) jwt_token = getattr(request.state, "jwt_token", None)
if not user_id: if not user_id:
return JSONResponse({"error": "User authentication required"}, status_code=401) return JSONResponse(
{"error": "User authentication required"}, status_code=401
)
# Create temporary files for task processing # Create temporary files for task processing
import tempfile import tempfile
import os import os
temp_file_paths = [] temp_file_paths = []
original_filenames = []
try: try:
# Create temp directory reference once
temp_dir = tempfile.gettempdir()
for upload_file in upload_files: for upload_file in upload_files:
# Read file content # Read file content
content = await upload_file.read() content = await upload_file.read()
# Create temporary file # Store ORIGINAL filename (not transformed)
original_filenames.append(upload_file.filename)
# Create temporary file with TRANSFORMED filename for filesystem safety
# Transform: spaces and / to underscore
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_") safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp( temp_path = os.path.join(temp_dir, safe_filename)
suffix=f"_{safe_filename}"
)
# Write content to temp file # Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file: with open(temp_path, "wb") as temp_file:
temp_file.write(content) temp_file.write(content)
temp_file_paths.append(temp_path) temp_file_paths.append(temp_path)
logger.debug( logger.debug(
@ -136,21 +147,22 @@ async def langflow_upload_ingest_task(
user_id=user_id, user_id=user_id,
has_settings=bool(settings), has_settings=bool(settings),
has_tweaks=bool(tweaks), has_tweaks=bool(tweaks),
delete_after_ingest=delete_after_ingest delete_after_ingest=delete_after_ingest,
) )
# Create langflow upload task # Create langflow upload task
print(f"tweaks: {tweaks}") logger.debug(
print(f"settings: {settings}") f"Preparing to create langflow upload task: tweaks={tweaks}, settings={settings}, jwt_token={jwt_token}, user_name={user_name}, user_email={user_email}, session_id={session_id}, delete_after_ingest={delete_after_ingest}, temp_file_paths={temp_file_paths}",
print(f"jwt_token: {jwt_token}") )
print(f"user_name: {user_name}") # Create a map between temp_file_paths and original_filenames
print(f"user_email: {user_email}") file_path_to_original_filename = dict(zip(temp_file_paths, original_filenames))
print(f"session_id: {session_id}") logger.debug(
print(f"delete_after_ingest: {delete_after_ingest}") f"File path to original filename map: {file_path_to_original_filename}",
print(f"temp_file_paths: {temp_file_paths}") )
task_id = await task_service.create_langflow_upload_task( task_id = await task_service.create_langflow_upload_task(
user_id=user_id, user_id=user_id,
file_paths=temp_file_paths, file_paths=temp_file_paths,
original_filenames=file_path_to_original_filename,
langflow_file_service=langflow_file_service, langflow_file_service=langflow_file_service,
session_manager=session_manager, session_manager=session_manager,
jwt_token=jwt_token, jwt_token=jwt_token,
@ -160,23 +172,28 @@ async def langflow_upload_ingest_task(
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
logger.debug("Langflow upload task created successfully", task_id=task_id) logger.debug("Langflow upload task created successfully", task_id=task_id)
return JSONResponse({ return JSONResponse(
"task_id": task_id, {
"message": f"Langflow upload task created for {len(upload_files)} file(s)", "task_id": task_id,
"file_count": len(upload_files) "message": f"Langflow upload task created for {len(upload_files)} file(s)",
}, status_code=202) # 202 Accepted for async processing "file_count": len(upload_files),
},
status_code=202,
) # 202 Accepted for async processing
except Exception: except Exception:
# Clean up temp files on error # Clean up temp files on error
from utils.file_utils import safe_unlink from utils.file_utils import safe_unlink
for temp_path in temp_file_paths: for temp_path in temp_file_paths:
safe_unlink(temp_path) safe_unlink(temp_path)
raise raise
except Exception as e: except Exception as e:
logger.error( logger.error(
"Task-based langflow upload_ingest endpoint failed", "Task-based langflow upload_ingest endpoint failed",
@ -184,5 +201,6 @@ async def langflow_upload_ingest_task(
error=str(e), error=str(e),
) )
import traceback import traceback
logger.error("Full traceback", traceback=traceback.format_exc()) logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500) return JSONResponse({"error": str(e)}, status_code=500)

View file

@ -953,6 +953,17 @@ async def create_app():
methods=["POST", "GET"], methods=["POST", "GET"],
), ),
# Document endpoints # Document endpoints
Route(
"/documents/check-filename",
require_auth(services["session_manager"])(
partial(
documents.check_filename_exists,
document_service=services["document_service"],
session_manager=services["session_manager"],
)
),
methods=["GET"],
),
Route( Route(
"/documents/delete-by-filename", "/documents/delete-by-filename",
require_auth(services["session_manager"])( require_auth(services["session_manager"])(

View file

@ -55,6 +55,96 @@ class TaskProcessor:
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff retry_delay *= 2 # Exponential backoff
async def check_filename_exists(
self,
filename: str,
opensearch_client,
) -> bool:
"""
Check if a document with the given filename already exists in OpenSearch.
Returns True if any chunks with this filename exist.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_search_body
import asyncio
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Search for any document with this exact filename
search_body = build_filename_search_body(filename, size=1, source=False)
response = await opensearch_client.search(
index=INDEX_NAME,
body=search_body
)
# Check if any hits were found
hits = response.get("hits", {}).get("hits", [])
return len(hits) > 0
except (asyncio.TimeoutError, Exception) as e:
if attempt == max_retries - 1:
logger.error(
"OpenSearch filename check failed after retries",
filename=filename,
error=str(e),
attempt=attempt + 1
)
# On final failure, assume document doesn't exist (safer to reprocess than skip)
logger.warning(
"Assuming filename doesn't exist due to connection issues",
filename=filename
)
return False
else:
logger.warning(
"OpenSearch filename check failed, retrying",
filename=filename,
error=str(e),
attempt=attempt + 1,
retry_in=retry_delay
)
await asyncio.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
async def delete_document_by_filename(
self,
filename: str,
opensearch_client,
) -> None:
"""
Delete all chunks of a document with the given filename from OpenSearch.
"""
from config.settings import INDEX_NAME
from utils.opensearch_queries import build_filename_delete_body
try:
# Delete all documents with this filename
delete_body = build_filename_delete_body(filename)
response = await opensearch_client.delete_by_query(
index=INDEX_NAME,
body=delete_body
)
deleted_count = response.get("deleted", 0)
logger.info(
"Deleted existing document chunks",
filename=filename,
deleted_count=deleted_count
)
except Exception as e:
logger.error(
"Failed to delete existing document",
filename=filename,
error=str(e)
)
raise
async def process_document_standard( async def process_document_standard(
self, self,
file_path: str, file_path: str,
@ -527,6 +617,7 @@ class LangflowFileProcessor(TaskProcessor):
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
): ):
super().__init__() super().__init__()
self.langflow_file_service = langflow_file_service self.langflow_file_service = langflow_file_service
@ -539,6 +630,7 @@ class LangflowFileProcessor(TaskProcessor):
self.tweaks = tweaks or {} self.tweaks = tweaks or {}
self.settings = settings self.settings = settings
self.delete_after_ingest = delete_after_ingest self.delete_after_ingest = delete_after_ingest
self.replace_duplicates = replace_duplicates
async def process_item( async def process_item(
self, upload_task: UploadTask, item: str, file_task: FileTask self, upload_task: UploadTask, item: str, file_task: FileTask
@ -554,37 +646,40 @@ class LangflowFileProcessor(TaskProcessor):
file_task.updated_at = time.time() file_task.updated_at = time.time()
try: try:
# Compute hash and check if already exists # Use the ORIGINAL filename stored in file_task (not the transformed temp path)
from utils.hash_utils import hash_id # This ensures we check/store the original filename with spaces, etc.
file_hash = hash_id(item) original_filename = file_task.filename or os.path.basename(item)
# Check if document already exists # Check if document with same filename already exists
opensearch_client = self.session_manager.get_user_opensearch_client( opensearch_client = self.session_manager.get_user_opensearch_client(
self.owner_user_id, self.jwt_token self.owner_user_id, self.jwt_token
) )
if await self.check_document_exists(file_hash, opensearch_client):
file_task.status = TaskStatus.COMPLETED filename_exists = await self.check_filename_exists(original_filename, opensearch_client)
file_task.result = {"status": "unchanged", "id": file_hash}
if filename_exists and not self.replace_duplicates:
# Duplicate exists and user hasn't confirmed replacement
file_task.status = TaskStatus.FAILED
file_task.error = f"File with name '{original_filename}' already exists"
file_task.updated_at = time.time() file_task.updated_at = time.time()
upload_task.successful_files += 1 upload_task.failed_files += 1
return return
elif filename_exists and self.replace_duplicates:
# Delete existing document before uploading new one
logger.info(f"Replacing existing document: {original_filename}")
await self.delete_document_by_filename(original_filename, opensearch_client)
# Read file content for processing # Read file content for processing
with open(item, 'rb') as f: with open(item, 'rb') as f:
content = f.read() content = f.read()
# Create file tuple for upload # Create file tuple for upload using ORIGINAL filename
temp_filename = os.path.basename(item) # This ensures the document is indexed with the original name
# Extract original filename from temp file suffix (remove tmp prefix) content_type, _ = mimetypes.guess_type(original_filename)
if "_" in temp_filename:
filename = temp_filename.split("_", 1)[1] # Get everything after first _
else:
filename = temp_filename
content_type, _ = mimetypes.guess_type(filename)
if not content_type: if not content_type:
content_type = 'application/octet-stream' content_type = 'application/octet-stream'
file_tuple = (filename, content, content_type) file_tuple = (original_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor # Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed # This will handle anonymous JWT creation if needed

View file

@ -20,7 +20,8 @@ class FileTask:
retry_count: int = 0 retry_count: int = 0
created_at: float = field(default_factory=time.time) created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time)
filename: Optional[str] = None # Original filename for display
@property @property
def duration_seconds(self) -> float: def duration_seconds(self) -> float:
"""Duration in seconds from creation to last update""" """Duration in seconds from creation to last update"""

View file

@ -1,6 +1,5 @@
import asyncio import asyncio
import random import random
from typing import Dict, Optional
import time import time
import uuid import uuid
@ -59,6 +58,7 @@ class TaskService:
file_paths: list, file_paths: list,
langflow_file_service, langflow_file_service,
session_manager, session_manager,
original_filenames: dict | None = None,
jwt_token: str = None, jwt_token: str = None,
owner_name: str = None, owner_name: str = None,
owner_email: str = None, owner_email: str = None,
@ -66,6 +66,7 @@ class TaskService:
tweaks: dict = None, tweaks: dict = None,
settings: dict = None, settings: dict = None,
delete_after_ingest: bool = True, delete_after_ingest: bool = True,
replace_duplicates: bool = False,
) -> str: ) -> str:
"""Create a new upload task for Langflow file processing with upload and ingest""" """Create a new upload task for Langflow file processing with upload and ingest"""
# Use LangflowFileProcessor with user context # Use LangflowFileProcessor with user context
@ -82,18 +83,35 @@ class TaskService:
tweaks=tweaks, tweaks=tweaks,
settings=settings, settings=settings,
delete_after_ingest=delete_after_ingest, delete_after_ingest=delete_after_ingest,
replace_duplicates=replace_duplicates,
) )
return await self.create_custom_task(user_id, file_paths, processor) return await self.create_custom_task(user_id, file_paths, processor, original_filenames)
async def create_custom_task(self, user_id: str, items: list, processor) -> str: async def create_custom_task(self, user_id: str, items: list, processor, original_filenames: dict | None = None) -> str:
"""Create a new task with custom processor for any type of items""" """Create a new task with custom processor for any type of items"""
import os
# Store anonymous tasks under a stable key so they can be retrieved later # Store anonymous tasks under a stable key so they can be retrieved later
store_user_id = user_id or AnonymousUser().user_id store_user_id = user_id or AnonymousUser().user_id
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# Create file tasks with original filenames if provided
normalized_originals = (
{str(k): v for k, v in original_filenames.items()} if original_filenames else {}
)
file_tasks = {
str(item): FileTask(
file_path=str(item),
filename=normalized_originals.get(
str(item), os.path.basename(str(item))
),
)
for item in items
}
upload_task = UploadTask( upload_task = UploadTask(
task_id=task_id, task_id=task_id,
total_files=len(items), total_files=len(items),
file_tasks={str(item): FileTask(file_path=str(item)) for item in items}, file_tasks=file_tasks,
) )
# Attach the custom processor to the task # Attach the custom processor to the task
@ -268,6 +286,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
# Count running and pending files # Count running and pending files
@ -322,6 +341,7 @@ class TaskService:
"created_at": file_task.created_at, "created_at": file_task.created_at,
"updated_at": file_task.updated_at, "updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds, "duration_seconds": file_task.duration_seconds,
"filename": file_task.filename,
} }
if file_task.status.value == "running": if file_task.status.value == "running":

View file

@ -31,10 +31,14 @@ class DoclingManager:
self._process: Optional[subprocess.Popen] = None self._process: Optional[subprocess.Popen] = None
self._port = 5001 self._port = 5001
self._host = "127.0.0.1" self._host = self._get_host_for_containers() # Get appropriate host IP based on runtime
self._running = False self._running = False
self._external_process = False self._external_process = False
# PID file to track docling-serve across sessions (in current working directory)
from pathlib import Path
self._pid_file = Path.cwd() / ".docling.pid"
# Log storage - simplified, no queue # Log storage - simplified, no queue
self._log_buffer: List[str] = [] self._log_buffer: List[str] = []
self._max_log_lines = 1000 self._max_log_lines = 1000
@ -42,22 +46,198 @@ class DoclingManager:
self._initialized = True self._initialized = True
def cleanup(self): # Try to recover existing process from PID file
"""Cleanup resources and stop any running processes.""" self._recover_from_pid_file()
if self._process and self._process.poll() is None:
self._add_log_entry("Cleaning up docling-serve process on exit")
try:
self._process.terminate()
self._process.wait(timeout=5)
except subprocess.TimeoutExpired:
self._process.kill()
self._process.wait()
except Exception as e:
self._add_log_entry(f"Error during cleanup: {e}")
self._running = False def _get_host_for_containers(self) -> str:
self._process = None """
Return a host IP that containers can reach (a bridge/CNI gateway).
Prefers Docker/Podman network gateways; falls back to bridge interfaces.
"""
import subprocess, json, shutil, re, logging
logger = logging.getLogger(__name__)
def run(cmd, timeout=2, text=True):
return subprocess.run(cmd, capture_output=True, text=text, timeout=timeout)
gateways = []
compose_gateways = [] # Highest priority - compose project networks
active_gateways = [] # Medium priority - networks with containers
# ---- Docker: enumerate networks and collect gateways
if shutil.which("docker"):
try:
ls = run(["docker", "network", "ls", "--format", "{{.Name}}"])
if ls.returncode == 0:
for name in filter(None, ls.stdout.splitlines()):
try:
insp = run(["docker", "network", "inspect", name, "--format", "{{json .}}"])
if insp.returncode == 0 and insp.stdout.strip():
nw = json.loads(insp.stdout)[0] if insp.stdout.strip().startswith("[") else json.loads(insp.stdout)
ipam = nw.get("IPAM", {})
containers = nw.get("Containers", {})
for cfg in ipam.get("Config", []) or []:
gw = cfg.get("Gateway")
if gw:
# Highest priority: compose networks (ending in _default)
if name.endswith("_default"):
compose_gateways.append(gw)
# Medium priority: networks with active containers
elif len(containers) > 0:
active_gateways.append(gw)
# Low priority: empty networks
else:
gateways.append(gw)
except Exception:
pass
except Exception:
pass
# ---- Podman: enumerate networks and collect gateways (netavark)
if shutil.which("podman"):
try:
# modern podman supports JSON format
ls = run(["podman", "network", "ls", "--format", "json"])
if ls.returncode == 0 and ls.stdout.strip():
for net in json.loads(ls.stdout):
name = net.get("name") or net.get("Name")
if not name:
continue
try:
insp = run(["podman", "network", "inspect", name, "--format", "json"])
if insp.returncode == 0 and insp.stdout.strip():
arr = json.loads(insp.stdout)
for item in (arr if isinstance(arr, list) else [arr]):
for sn in item.get("subnets", []) or []:
gw = sn.get("gateway")
if gw:
# Prioritize compose/project networks
if name.endswith("_default") or "_" in name:
compose_gateways.append(gw)
else:
gateways.append(gw)
except Exception:
pass
except Exception:
pass
# ---- Fallback: parse host interfaces for common bridges
if not gateways:
try:
if shutil.which("ip"):
show = run(["ip", "-o", "-4", "addr", "show"])
if show.returncode == 0:
for line in show.stdout.splitlines():
# e.g. "12: br-3f0f... inet 172.18.0.1/16 ..."
m = re.search(r"^\d+:\s+([a-zA-Z0-9_.:-]+)\s+.*\binet\s+(\d+\.\d+\.\d+\.\d+)/", line)
if not m:
continue
ifname, ip = m.group(1), m.group(2)
if ifname == "docker0" or ifname.startswith(("br-", "cni")):
gateways.append(ip)
else:
# As a last resort, try net-tools ifconfig output
if shutil.which("ifconfig"):
show = run(["ifconfig"])
for block in show.stdout.split("\n\n"):
if any(block.strip().startswith(n) for n in ("docker0", "cni", "br-")):
m = re.search(r"inet (?:addr:)?(\d+\.\d+\.\d+\.\d+)", block)
if m:
gateways.append(m.group(1))
except Exception:
pass
# Dedup, prioritizing: 1) compose networks, 2) active networks, 3) all others
seen, uniq = set(), []
# First: compose project networks (_default suffix)
for ip in compose_gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
# Second: networks with active containers
for ip in active_gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
# Third: all other gateways
for ip in gateways:
if ip not in seen:
uniq.append(ip)
seen.add(ip)
if uniq:
if len(uniq) > 1:
logger.info("Container-reachable host IP candidates: %s", ", ".join(uniq))
else:
logger.info("Container-reachable host IP: %s", uniq[0])
return uniq[0]
# Nothing found: warn clearly
logger.warning(
"No container bridge IP found. If using rootless Podman (slirp4netns), there is no host bridge; publish ports or use 10.0.2.2 from the container."
)
# Returning localhost is honest only for same-namespace; keep it explicit:
return "127.0.0.1"
def cleanup(self):
"""Cleanup resources but keep docling-serve running across sessions."""
# Don't stop the process on exit - let it persist
# Just clean up our references
self._add_log_entry("TUI exiting - docling-serve will continue running")
# Note: We keep the PID file so we can reconnect in future sessions
def _save_pid(self, pid: int) -> None:
"""Save the process PID to a file for persistence across sessions."""
try:
self._pid_file.write_text(str(pid))
self._add_log_entry(f"Saved PID {pid} to {self._pid_file}")
except Exception as e:
self._add_log_entry(f"Failed to save PID file: {e}")
def _load_pid(self) -> Optional[int]:
"""Load the process PID from file."""
try:
if self._pid_file.exists():
pid_str = self._pid_file.read_text().strip()
if pid_str.isdigit():
return int(pid_str)
except Exception as e:
self._add_log_entry(f"Failed to load PID file: {e}")
return None
def _clear_pid_file(self) -> None:
"""Remove the PID file."""
try:
if self._pid_file.exists():
self._pid_file.unlink()
self._add_log_entry("Cleared PID file")
except Exception as e:
self._add_log_entry(f"Failed to clear PID file: {e}")
def _is_process_running(self, pid: int) -> bool:
"""Check if a process with the given PID is running."""
try:
# Send signal 0 to check if process exists (doesn't actually send a signal)
os.kill(pid, 0)
return True
except OSError:
return False
def _recover_from_pid_file(self) -> None:
"""Try to recover connection to existing docling-serve process from PID file."""
pid = self._load_pid()
if pid is not None:
if self._is_process_running(pid):
self._add_log_entry(f"Recovered existing docling-serve process (PID: {pid})")
# Mark as external process since we didn't start it in this session
self._external_process = True
self._running = True
# Note: We don't have a Popen object for this process, but that's OK
# We'll detect it's running via port check
else:
self._add_log_entry(f"Stale PID file found (PID: {pid} not running)")
self._clear_pid_file()
def _add_log_entry(self, message: str) -> None: def _add_log_entry(self, message: str) -> None:
"""Add a log entry to the buffer (thread-safe).""" """Add a log entry to the buffer (thread-safe)."""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S") timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
@ -70,43 +250,35 @@ class DoclingManager:
self._log_buffer = self._log_buffer[-self._max_log_lines:] self._log_buffer = self._log_buffer[-self._max_log_lines:]
def is_running(self) -> bool: def is_running(self) -> bool:
"""Check if docling serve is running.""" """Check if docling serve is running (by PID only)."""
# First check our internal state # Check if we have a direct process handle
internal_running = self._running and self._process is not None and self._process.poll() is None if self._process is not None and self._process.poll() is None:
self._running = True
# If we think it's not running, check if something is listening on the port
# This handles cases where docling-serve was started outside the TUI
if not internal_running:
try:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5)
result = s.connect_ex((self._host, self._port))
s.close()
# If port is in use, something is running there
if result == 0:
# Only log this once when we first detect external process
if not self._external_process:
self._add_log_entry(f"Detected external docling-serve running on {self._host}:{self._port}")
# Set a flag to indicate this is an external process
self._external_process = True
return True
except Exception as e:
# Only log errors occasionally to avoid spam
if not hasattr(self, '_last_port_error') or self._last_port_error != str(e):
self._add_log_entry(f"Error checking port: {e}")
self._last_port_error = str(e)
else:
# If we started it, it's not external
self._external_process = False self._external_process = False
return True
return internal_running # Check if we have a PID from file
pid = self._load_pid()
if pid is not None and self._is_process_running(pid):
self._running = True
self._external_process = True
return True
# No running process found
self._running = False
self._external_process = False
return False
def get_status(self) -> Dict[str, Any]: def get_status(self) -> Dict[str, Any]:
"""Get current status of docling serve.""" """Get current status of docling serve."""
if self.is_running(): if self.is_running():
pid = self._process.pid if self._process else None # Try to get PID from process handle first, then from PID file
pid = None
if self._process:
pid = self._process.pid
else:
pid = self._load_pid()
return { return {
"status": "running", "status": "running",
"port": self._port, "port": self._port,
@ -127,13 +299,28 @@ class DoclingManager:
"pid": None "pid": None
} }
async def start(self, port: int = 5001, host: str = "127.0.0.1", enable_ui: bool = False) -> Tuple[bool, str]: async def start(self, port: int = 5001, host: Optional[str] = None, enable_ui: bool = False) -> Tuple[bool, str]:
"""Start docling serve as external process.""" """Start docling serve as external process."""
if self.is_running(): if self.is_running():
return False, "Docling serve is already running" return False, "Docling serve is already running"
self._port = port self._port = port
self._host = host # Use provided host or the bridge IP we detected in __init__
if host is not None:
self._host = host
# else: keep self._host as already set in __init__
# Check if port is already in use before trying to start
import socket
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5)
result = s.connect_ex((self._host, self._port))
s.close()
if result == 0:
return False, f"Port {self._port} on {self._host} is already in use by another process. Please stop it first."
except Exception as e:
self._add_log_entry(f"Error checking port availability: {e}")
# Clear log buffer when starting # Clear log buffer when starting
self._log_buffer = [] self._log_buffer = []
@ -146,14 +333,14 @@ class DoclingManager:
if shutil.which("uv") and (os.path.exists("pyproject.toml") or os.getenv("VIRTUAL_ENV")): if shutil.which("uv") and (os.path.exists("pyproject.toml") or os.getenv("VIRTUAL_ENV")):
cmd = [ cmd = [
"uv", "run", "python", "-m", "docling_serve", "run", "uv", "run", "python", "-m", "docling_serve", "run",
"--host", host, "--host", self._host,
"--port", str(port), "--port", str(self._port),
] ]
else: else:
cmd = [ cmd = [
sys.executable, "-m", "docling_serve", "run", sys.executable, "-m", "docling_serve", "run",
"--host", host, "--host", self._host,
"--port", str(port), "--port", str(self._port),
] ]
if enable_ui: if enable_ui:
@ -173,6 +360,9 @@ class DoclingManager:
self._running = True self._running = True
self._add_log_entry("External process started") self._add_log_entry("External process started")
# Save the PID to file for persistence
self._save_pid(self._process.pid)
# Start a thread to capture output # Start a thread to capture output
self._start_output_capture() self._start_output_capture()
@ -192,11 +382,11 @@ class DoclingManager:
import socket import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(0.5) s.settimeout(0.5)
result = s.connect_ex((host, port)) result = s.connect_ex((self._host, self._port))
s.close() s.close()
if result == 0: if result == 0:
self._add_log_entry(f"Docling-serve is now listening on {host}:{port}") self._add_log_entry(f"Docling-serve is now listening on {self._host}:{self._port}")
break break
except: except:
pass pass
@ -298,9 +488,12 @@ class DoclingManager:
try: try:
self._add_log_entry("Stopping docling-serve process") self._add_log_entry("Stopping docling-serve process")
pid_to_stop = None
if self._process: if self._process:
# We started this process, so we can stop it directly # We have a direct process handle
self._add_log_entry(f"Terminating our process (PID: {self._process.pid})") pid_to_stop = self._process.pid
self._add_log_entry(f"Terminating our process (PID: {pid_to_stop})")
self._process.terminate() self._process.terminate()
# Wait for it to stop # Wait for it to stop
@ -315,16 +508,32 @@ class DoclingManager:
self._add_log_entry("Process force killed") self._add_log_entry("Process force killed")
elif self._external_process: elif self._external_process:
# This is an external process, we can't stop it directly # This is a process we recovered from PID file
self._add_log_entry("Cannot stop external docling-serve process - it was started outside the TUI") pid_to_stop = self._load_pid()
self._running = False if pid_to_stop and self._is_process_running(pid_to_stop):
self._external_process = False self._add_log_entry(f"Stopping process from PID file (PID: {pid_to_stop})")
return False, "Cannot stop external docling-serve process. Please stop it manually." try:
os.kill(pid_to_stop, 15) # SIGTERM
# Wait a bit for graceful shutdown
await asyncio.sleep(2)
if self._is_process_running(pid_to_stop):
# Still running, force kill
self._add_log_entry(f"Force killing process (PID: {pid_to_stop})")
os.kill(pid_to_stop, 9) # SIGKILL
except Exception as e:
self._add_log_entry(f"Error stopping external process: {e}")
return False, f"Error stopping external process: {str(e)}"
else:
self._add_log_entry("External process not found")
return False, "Process not found"
self._running = False self._running = False
self._process = None self._process = None
self._external_process = False self._external_process = False
# Clear the PID file since we intentionally stopped the service
self._clear_pid_file()
self._add_log_entry("Docling serve stopped successfully") self._add_log_entry("Docling serve stopped successfully")
return True, "Docling serve stopped successfully" return True, "Docling serve stopped successfully"

View file

@ -118,9 +118,16 @@ class WelcomeScreen(Screen):
welcome_text.append(ascii_art, style="bold white") welcome_text.append(ascii_art, style="bold white")
welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim") welcome_text.append("Terminal User Interface for OpenRAG\n\n", style="dim")
if self.services_running: # Check if all services are running
all_services_running = self.services_running and self.docling_running
if all_services_running:
welcome_text.append( welcome_text.append(
"✓ Services are currently running\n\n", style="bold green" "✓ All services are running\n\n", style="bold green"
)
elif self.services_running or self.docling_running:
welcome_text.append(
"⚠ Some services are running\n\n", style="bold yellow"
) )
elif self.has_oauth_config: elif self.has_oauth_config:
welcome_text.append( welcome_text.append(
@ -140,16 +147,19 @@ class WelcomeScreen(Screen):
buttons = [] buttons = []
if self.services_running: # Check if all services (native + container) are running
# Services running - show app link first, then stop services all_services_running = self.services_running and self.docling_running
if all_services_running:
# All services running - show app link first, then stop all
buttons.append( buttons.append(
Button("Launch OpenRAG", variant="success", id="open-app-btn") Button("Launch OpenRAG", variant="success", id="open-app-btn")
) )
buttons.append( buttons.append(
Button("Stop Container Services", variant="error", id="stop-services-btn") Button("Stop All Services", variant="error", id="stop-all-services-btn")
) )
else: else:
# Services not running - show setup options and start services # Some or no services running - show setup options and start all
if has_oauth: if has_oauth:
# If OAuth is configured, only show advanced setup # If OAuth is configured, only show advanced setup
buttons.append( buttons.append(
@ -165,25 +175,7 @@ class WelcomeScreen(Screen):
) )
buttons.append( buttons.append(
Button("Start Container Services", variant="primary", id="start-services-btn") Button("Start All Services", variant="primary", id="start-all-services-btn")
)
# Native services controls
if self.docling_running:
buttons.append(
Button(
"Stop Native Services",
variant="warning",
id="stop-native-services-btn",
)
)
else:
buttons.append(
Button(
"Start Native Services",
variant="primary",
id="start-native-services-btn",
)
) )
# Always show status option # Always show status option
@ -213,7 +205,7 @@ class WelcomeScreen(Screen):
) )
# Set default button focus # Set default button focus
if self.services_running: if self.services_running and self.docling_running:
self.default_button_id = "open-app-btn" self.default_button_id = "open-app-btn"
elif self.has_oauth_config: elif self.has_oauth_config:
self.default_button_id = "advanced-setup-btn" self.default_button_id = "advanced-setup-btn"
@ -234,7 +226,7 @@ class WelcomeScreen(Screen):
def _focus_appropriate_button(self) -> None: def _focus_appropriate_button(self) -> None:
"""Focus the appropriate button based on current state.""" """Focus the appropriate button based on current state."""
try: try:
if self.services_running: if self.services_running and self.docling_running:
self.query_one("#open-app-btn").focus() self.query_one("#open-app-btn").focus()
elif self.has_oauth_config: elif self.has_oauth_config:
self.query_one("#advanced-setup-btn").focus() self.query_one("#advanced-setup-btn").focus()
@ -253,20 +245,16 @@ class WelcomeScreen(Screen):
self.action_monitor() self.action_monitor()
elif event.button.id == "diagnostics-btn": elif event.button.id == "diagnostics-btn":
self.action_diagnostics() self.action_diagnostics()
elif event.button.id == "start-services-btn": elif event.button.id == "start-all-services-btn":
self.action_start_stop_services() self.action_start_all_services()
elif event.button.id == "stop-services-btn": elif event.button.id == "stop-all-services-btn":
self.action_start_stop_services() self.action_stop_all_services()
elif event.button.id == "start-native-services-btn":
self.action_start_native_services()
elif event.button.id == "stop-native-services-btn":
self.action_stop_native_services()
elif event.button.id == "open-app-btn": elif event.button.id == "open-app-btn":
self.action_open_app() self.action_open_app()
def action_default_action(self) -> None: def action_default_action(self) -> None:
"""Handle Enter key - go to default action based on state.""" """Handle Enter key - go to default action based on state."""
if self.services_running: if self.services_running and self.docling_running:
self.action_open_app() self.action_open_app()
elif self.has_oauth_config: elif self.has_oauth_config:
self.action_full_setup() self.action_full_setup()
@ -297,28 +285,13 @@ class WelcomeScreen(Screen):
self.app.push_screen(DiagnosticsScreen()) self.app.push_screen(DiagnosticsScreen())
def action_start_stop_services(self) -> None: def action_start_all_services(self) -> None:
"""Start or stop all services (containers + docling).""" """Start all services (native first, then containers)."""
if self.services_running: self.run_worker(self._start_all_services())
# Stop services - show modal with progress
if self.container_manager.is_available(): def action_stop_all_services(self) -> None:
command_generator = self.container_manager.stop_services() """Stop all services (containers first, then native)."""
modal = CommandOutputModal( self.run_worker(self._stop_all_services())
"Stopping Services",
command_generator,
on_complete=self._on_services_operation_complete,
)
self.app.push_screen(modal)
else:
# Start services - show modal with progress
if self.container_manager.is_available():
command_generator = self.container_manager.start_services()
modal = CommandOutputModal(
"Starting Services",
command_generator,
on_complete=self._on_services_operation_complete,
)
self.app.push_screen(modal)
async def _on_services_operation_complete(self) -> None: async def _on_services_operation_complete(self) -> None:
"""Handle completion of services start/stop operation.""" """Handle completion of services start/stop operation."""
@ -334,7 +307,7 @@ class WelcomeScreen(Screen):
def _update_default_button(self) -> None: def _update_default_button(self) -> None:
"""Update the default button target based on state.""" """Update the default button target based on state."""
if self.services_running: if self.services_running and self.docling_running:
self.default_button_id = "open-app-btn" self.default_button_id = "open-app-btn"
elif self.has_oauth_config: elif self.has_oauth_config:
self.default_button_id = "advanced-setup-btn" self.default_button_id = "advanced-setup-btn"
@ -362,51 +335,84 @@ class WelcomeScreen(Screen):
self.call_after_refresh(self._focus_appropriate_button) self.call_after_refresh(self._focus_appropriate_button)
def action_start_native_services(self) -> None: async def _start_all_services(self) -> None:
"""Start native services (docling).""" """Start all services: containers first, then native services."""
if self.docling_running: # Step 1: Start container services first (to create the network)
self.notify("Native services are already running.", severity="warning") if self.container_manager.is_available():
return command_generator = self.container_manager.start_services()
modal = CommandOutputModal(
"Starting Container Services",
command_generator,
on_complete=self._on_containers_started_start_native,
)
self.app.push_screen(modal)
else:
self.notify("No container runtime available", severity="warning")
# Still try to start native services
await self._start_native_services_after_containers()
self.run_worker(self._start_native_services()) async def _on_containers_started_start_native(self) -> None:
"""Called after containers start successfully, now start native services."""
# Update container state
self._detect_services_sync()
async def _start_native_services(self) -> None: # Now start native services (docling-serve can now detect the compose network)
"""Worker task to start native services.""" await self._start_native_services_after_containers()
try:
async def _start_native_services_after_containers(self) -> None:
"""Start native services after containers have been started."""
if not self.docling_manager.is_running():
self.notify("Starting native services...", severity="information")
success, message = await self.docling_manager.start() success, message = await self.docling_manager.start()
if success: if success:
self.docling_running = True
self.notify(message, severity="information") self.notify(message, severity="information")
else: else:
self.notify(f"Failed to start native services: {message}", severity="error") self.notify(f"Failed to start native services: {message}", severity="error")
except Exception as exc: else:
self.notify(f"Error starting native services: {exc}", severity="error") self.notify("Native services already running", severity="information")
finally:
self.docling_running = self.docling_manager.is_running()
await self._refresh_welcome_content()
def action_stop_native_services(self) -> None: # Update state
"""Stop native services (docling).""" self.docling_running = self.docling_manager.is_running()
if not self.docling_running and not self.docling_manager.is_running(): await self._refresh_welcome_content()
self.notify("Native services are not running.", severity="warning")
return
self.run_worker(self._stop_native_services()) async def _stop_all_services(self) -> None:
"""Stop all services: containers first, then native."""
# Step 1: Stop container services
if self.container_manager.is_available() and self.services_running:
command_generator = self.container_manager.stop_services()
modal = CommandOutputModal(
"Stopping Container Services",
command_generator,
on_complete=self._on_stop_containers_complete,
)
self.app.push_screen(modal)
else:
# No containers to stop, go directly to stopping native services
await self._stop_native_services_after_containers()
async def _stop_native_services(self) -> None: async def _on_stop_containers_complete(self) -> None:
"""Worker task to stop native services.""" """Called after containers are stopped, now stop native services."""
try: # Update container state
self._detect_services_sync()
# Now stop native services
await self._stop_native_services_after_containers()
async def _stop_native_services_after_containers(self) -> None:
"""Stop native services after containers have been stopped."""
if self.docling_manager.is_running():
self.notify("Stopping native services...", severity="information")
success, message = await self.docling_manager.stop() success, message = await self.docling_manager.stop()
if success: if success:
self.docling_running = False
self.notify(message, severity="information") self.notify(message, severity="information")
else: else:
self.notify(f"Failed to stop native services: {message}", severity="error") self.notify(f"Failed to stop native services: {message}", severity="error")
except Exception as exc: else:
self.notify(f"Error stopping native services: {exc}", severity="error") self.notify("Native services already stopped", severity="information")
finally:
self.docling_running = self.docling_manager.is_running() # Update state
await self._refresh_welcome_content() self.docling_running = self.docling_manager.is_running()
await self._refresh_welcome_content()
def action_open_app(self) -> None: def action_open_app(self) -> None:
"""Open the OpenRAG app in the default browser.""" """Open the OpenRAG app in the default browser."""

View file

@ -0,0 +1,55 @@
"""
Utility functions for constructing OpenSearch queries consistently.
"""
from typing import Union, List
def build_filename_query(filename: str) -> dict:
"""
Build a standardized query for finding documents by filename.
Args:
filename: The exact filename to search for
Returns:
A dict containing the OpenSearch query body
"""
return {
"term": {
"filename": filename
}
}
def build_filename_search_body(filename: str, size: int = 1, source: Union[bool, List[str]] = False) -> dict:
"""
Build a complete search body for checking if a filename exists.
Args:
filename: The exact filename to search for
size: Number of results to return (default: 1)
source: Whether to include source fields, or list of specific fields to include (default: False)
Returns:
A dict containing the complete OpenSearch search body
"""
return {
"query": build_filename_query(filename),
"size": size,
"_source": source
}
def build_filename_delete_body(filename: str) -> dict:
"""
Build a delete-by-query body for removing all documents with a filename.
Args:
filename: The exact filename to delete
Returns:
A dict containing the OpenSearch delete-by-query body
"""
return {
"query": build_filename_query(filename)
}