feat: add knowledge status (#53)

* feat: add status handling and visual indicators for file statuses

* refactor: comment out status field and related rendering logic in SearchPage

* format

* add timeout on mutation delete document

* make file fields be optional

* fetch task files and display them on knowledge page

* add tasks to files inside task context

* added failed to status badge

* added files on get all tasks on backend

* Changed models to get parameters by settings if not existent

* changed settings page to get models when is no ajth mode

* fixed openai allowing validation even when value is not present

* removed unused console log

---------

Co-authored-by: Lucas Oliveira <lucas.edu.oli@hotmail.com>
Co-authored-by: Mike Fortman <michael.fortman@datastax.com>
This commit is contained in:
Deon Sanchez 2025-09-24 07:27:59 -06:00 committed by GitHub
parent 8ee0438d16
commit be8e13a173
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 529 additions and 110 deletions

View file

@ -14,7 +14,7 @@ interface DeleteDocumentResponse {
}
const deleteDocument = async (
data: DeleteDocumentRequest
data: DeleteDocumentRequest,
): Promise<DeleteDocumentResponse> => {
const response = await fetch("/api/documents/delete-by-filename", {
method: "POST",
@ -37,9 +37,11 @@ export const useDeleteDocument = () => {
return useMutation({
mutationFn: deleteDocument,
onSuccess: () => {
onSettled: () => {
// Invalidate and refetch search queries to update the UI
queryClient.invalidateQueries({ queryKey: ["search"] });
setTimeout(() => {
queryClient.invalidateQueries({ queryKey: ["search"] });
}, 1000);
},
});
};

View file

@ -54,7 +54,7 @@ export const useGetOpenAIModelsQuery = (
queryKey: ["models", "openai", params],
queryFn: getOpenAIModels,
retry: 2,
enabled: options?.enabled !== false, // Allow enabling/disabling from options
enabled: !!params?.apiKey,
staleTime: 0, // Always fetch fresh data
gcTime: 0, // Don't cache results
...options,

View file

@ -34,21 +34,28 @@ export interface ChunkResult {
export interface File {
filename: string;
mimetype: string;
chunkCount: number;
avgScore: number;
chunkCount?: number;
avgScore?: number;
source_url: string;
owner: string;
owner_name: string;
owner_email: string;
owner?: string;
owner_name?: string;
owner_email?: string;
size: number;
connector_type: string;
chunks: ChunkResult[];
status?:
| "processing"
| "active"
| "unavailable"
| "failed"
| "hidden"
| "sync";
chunks?: ChunkResult[];
}
export const useGetSearchQuery = (
query: string,
queryData?: ParsedQueryData | null,
options?: Omit<UseQueryOptions, "queryKey" | "queryFn">
options?: Omit<UseQueryOptions, "queryKey" | "queryFn">,
) => {
const queryClient = useQueryClient();
@ -149,7 +156,7 @@ export const useGetSearchQuery = (
}
});
const files: File[] = Array.from(fileMap.values()).map(file => ({
const files: File[] = Array.from(fileMap.values()).map((file) => ({
filename: file.filename,
mimetype: file.mimetype,
chunkCount: file.chunks.length,
@ -173,11 +180,11 @@ export const useGetSearchQuery = (
const queryResult = useQuery(
{
queryKey: ["search", effectiveQuery],
placeholderData: prev => prev,
placeholderData: (prev) => prev,
queryFn: getFiles,
...options,
},
queryClient
queryClient,
);
return queryResult;

View file

@ -1,16 +1,10 @@
"use client";
import {
Building2,
Cloud,
HardDrive,
Search,
Trash2,
X,
} from "lucide-react";
import { AgGridReact, CustomCellRendererProps } from "ag-grid-react";
import { useCallback, useState, useRef, ChangeEvent } from "react";
import type { ColDef } from "ag-grid-community";
import { AgGridReact, type CustomCellRendererProps } from "ag-grid-react";
import { Building2, Cloud, HardDrive, Search, Trash2, X } from "lucide-react";
import { useRouter } from "next/navigation";
import { type ChangeEvent, useCallback, useRef, useState } from "react";
import { SiGoogledrive } from "react-icons/si";
import { TbBrandOnedrive } from "react-icons/tb";
import { KnowledgeDropdown } from "@/components/knowledge-dropdown";
@ -19,13 +13,13 @@ import { Button } from "@/components/ui/button";
import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context";
import { useTask } from "@/contexts/task-context";
import { type File, useGetSearchQuery } from "../api/queries/useGetSearchQuery";
import { ColDef } from "ag-grid-community";
import "@/components/AgGrid/registerAgGridModules";
import "@/components/AgGrid/agGridStyles.css";
import { toast } from "sonner";
import { KnowledgeActionsDropdown } from "@/components/knowledge-actions-dropdown";
import { StatusBadge } from "@/components/ui/status-badge";
import { DeleteConfirmationDialog } from "../../../components/confirmation-dialog";
import { useDeleteDocument } from "../api/mutations/useDeleteDocument";
import { toast } from "sonner";
// Function to get the appropriate icon for a connector type
function getSourceIcon(connectorType?: string) {
@ -51,7 +45,7 @@ function getSourceIcon(connectorType?: string) {
function SearchPage() {
const router = useRouter();
const { isMenuOpen } = useTask();
const { isMenuOpen, files: taskFiles } = useTask();
const { selectedFilter, setSelectedFilter, parsedFilterData, isPanelOpen } =
useKnowledgeFilter();
const [selectedRows, setSelectedRows] = useState<File[]>([]);
@ -61,14 +55,38 @@ function SearchPage() {
const { data = [], isFetching } = useGetSearchQuery(
parsedFilterData?.query || "*",
parsedFilterData
parsedFilterData,
);
const handleTableSearch = (e: ChangeEvent<HTMLInputElement>) => {
gridRef.current?.api.setGridOption("quickFilterText", e.target.value);
};
const fileResults = data as File[];
// Convert TaskFiles to File format and merge with backend results
const taskFilesAsFiles: File[] = taskFiles.map((taskFile) => {
return {
filename: taskFile.filename,
mimetype: taskFile.mimetype,
source_url: taskFile.source_url,
size: taskFile.size,
connector_type: taskFile.connector_type,
status: taskFile.status,
};
});
const backendFiles = data as File[];
const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => {
return (
taskFile.status !== "active" &&
!backendFiles.some(
(backendFile) => backendFile.filename === taskFile.filename,
)
);
});
// Combine task files first, then backend files
const fileResults = [...backendFiles, ...filteredTaskFiles];
const gridRef = useRef<AgGridReact>(null);
@ -82,13 +100,14 @@ function SearchPage() {
minWidth: 220,
cellRenderer: ({ data, value }: CustomCellRendererProps<File>) => {
return (
<div
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors"
<button
type="button"
className="flex items-center gap-2 cursor-pointer hover:text-blue-600 transition-colors text-left w-full"
onClick={() => {
router.push(
`/knowledge/chunks?filename=${encodeURIComponent(
data?.filename ?? ""
)}`
data?.filename ?? "",
)}`,
);
}}
>
@ -96,7 +115,7 @@ function SearchPage() {
<span className="font-medium text-foreground truncate">
{value}
</span>
</div>
</button>
);
},
},
@ -119,6 +138,7 @@ function SearchPage() {
{
field: "chunkCount",
headerName: "Chunks",
valueFormatter: (params) => params.data?.chunkCount?.toString() || "-",
},
{
field: "avgScore",
@ -127,11 +147,20 @@ function SearchPage() {
cellRenderer: ({ value }: CustomCellRendererProps<File>) => {
return (
<span className="text-xs text-green-400 bg-green-400/20 px-2 py-1 rounded">
{value.toFixed(2)}
{value?.toFixed(2) ?? "-"}
</span>
);
},
},
{
field: "status",
headerName: "Status",
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
// Default to 'active' status if no status is provided
const status = data?.status || "active";
return <StatusBadge status={status} />;
},
},
{
cellRenderer: ({ data }: CustomCellRendererProps<File>) => {
return <KnowledgeActionsDropdown filename={data?.filename || ""} />;
@ -172,7 +201,7 @@ function SearchPage() {
try {
// Delete each file individually since the API expects one filename at a time
const deletePromises = selectedRows.map((row) =>
deleteDocumentMutation.mutateAsync({ filename: row.filename })
deleteDocumentMutation.mutateAsync({ filename: row.filename }),
);
await Promise.all(deletePromises);
@ -180,7 +209,7 @@ function SearchPage() {
toast.success(
`Successfully deleted ${selectedRows.length} document${
selectedRows.length > 1 ? "s" : ""
}`
}`,
);
setSelectedRows([]);
setShowBulkDeleteDialog(false);
@ -193,7 +222,7 @@ function SearchPage() {
toast.error(
error instanceof Error
? error.message
: "Failed to delete some documents"
: "Failed to delete some documents",
);
}
};

View file

@ -4,11 +4,13 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react";
import { useSearchParams } from "next/navigation";
import { Suspense, useCallback, useEffect, useState } from "react";
import { useUpdateFlowSettingMutation } from "@/app/api/mutations/useUpdateFlowSettingMutation";
import {
useGetIBMModelsQuery,
useGetOllamaModelsQuery,
useGetOpenAIModelsQuery,
} from "@/app/api/queries/useGetModelsQuery";
import { useGetSettingsQuery } from "@/app/api/queries/useGetSettingsQuery";
import { useGetOpenAIModelsQuery, useGetOllamaModelsQuery, useGetIBMModelsQuery } from "@/app/api/queries/useGetModelsQuery";
import { ConfirmationDialog } from "@/components/confirmation-dialog";
import { ModelSelectItems } from "./helpers/model-select-item";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ProtectedRoute } from "@/components/protected-route";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
@ -33,6 +35,8 @@ import { Textarea } from "@/components/ui/textarea";
import { useAuth } from "@/contexts/auth-context";
import { useTask } from "@/contexts/task-context";
import { useDebounce } from "@/lib/debounce";
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ModelSelectItems } from "./helpers/model-select-item";
const MAX_SYSTEM_PROMPT_CHARS = 2000;
@ -105,42 +109,46 @@ function KnowledgeSourcesPage() {
// Fetch settings using React Query
const { data: settings = {} } = useGetSettingsQuery({
enabled: isAuthenticated,
enabled: isAuthenticated || isNoAuthMode,
});
// Get the current provider from settings
const currentProvider = (settings.provider?.model_provider || 'openai') as ModelProvider;
const currentProvider = (settings.provider?.model_provider ||
"openai") as ModelProvider;
// Fetch available models based on provider
const { data: openaiModelsData } = useGetOpenAIModelsQuery(
undefined, // Let backend use stored API key from configuration
{
enabled: isAuthenticated && currentProvider === 'openai',
}
enabled:
(isAuthenticated || isNoAuthMode) && currentProvider === "openai",
},
);
const { data: ollamaModelsData } = useGetOllamaModelsQuery(
undefined, // No params for now, could be extended later
{
enabled: isAuthenticated && currentProvider === 'ollama',
}
enabled:
(isAuthenticated || isNoAuthMode) && currentProvider === "ollama",
},
);
const { data: ibmModelsData } = useGetIBMModelsQuery(
undefined, // No params for now, could be extended later
{
enabled: isAuthenticated && currentProvider === 'ibm',
}
enabled: (isAuthenticated || isNoAuthMode) && currentProvider === "ibm",
},
);
// Select the appropriate models data based on provider
const modelsData = currentProvider === 'openai'
? openaiModelsData
: currentProvider === 'ollama'
? ollamaModelsData
: currentProvider === 'ibm'
? ibmModelsData
: openaiModelsData; // fallback to openai
const modelsData =
currentProvider === "openai"
? openaiModelsData
: currentProvider === "ollama"
? ollamaModelsData
: currentProvider === "ibm"
? ibmModelsData
: openaiModelsData; // fallback to openai
// Mutations
const updateFlowSettingMutation = useUpdateFlowSettingMutation({
@ -152,7 +160,6 @@ function KnowledgeSourcesPage() {
},
});
// Debounced update function
const debouncedUpdate = useDebounce(
(variables: Parameters<typeof updateFlowSettingMutation.mutate>[0]) => {
@ -224,7 +231,6 @@ function KnowledgeSourcesPage() {
debouncedUpdate({ doclingPresets: mode });
};
// Helper function to get connector icon
const getConnectorIcon = useCallback((iconName: string) => {
const iconMap: { [key: string]: React.ReactElement } = {
@ -613,7 +619,11 @@ function KnowledgeSourcesPage() {
Language Model
</Label>
<Select
value={settings.agent?.llm_model || modelsData?.language_models?.find(m => m.default)?.value || "gpt-4"}
value={
settings.agent?.llm_model ||
modelsData?.language_models?.find((m) => m.default)?.value ||
"gpt-4"
}
onValueChange={handleModelChange}
>
<SelectTrigger id="model-select">
@ -638,10 +648,20 @@ function KnowledgeSourcesPage() {
value={systemPrompt}
onChange={(e) => setSystemPrompt(e.target.value)}
rows={6}
className={`resize-none ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'border-red-500 focus:border-red-500' : ''}`}
className={`resize-none ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "border-red-500 focus:border-red-500"
: ""
}`}
/>
<div className="flex justify-start">
<span className={`text-xs ${systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS ? 'text-red-500' : 'text-muted-foreground'}`}>
<span
className={`text-xs ${
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
? "text-red-500"
: "text-muted-foreground"
}`}
>
{systemPrompt.length}/{MAX_SYSTEM_PROMPT_CHARS} characters
</span>
</div>
@ -649,7 +669,10 @@ function KnowledgeSourcesPage() {
<div className="flex justify-end pt-2">
<Button
onClick={handleSystemPromptSave}
disabled={updateFlowSettingMutation.isPending || systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS}
disabled={
updateFlowSettingMutation.isPending ||
systemPrompt.length > MAX_SYSTEM_PROMPT_CHARS
}
className="min-w-[120px]"
size="sm"
variant="outline"
@ -736,7 +759,9 @@ function KnowledgeSourcesPage() {
</Label>
<Select
value={
settings.knowledge?.embedding_model || modelsData?.embedding_models?.find(m => m.default)?.value || "text-embedding-ada-002"
settings.knowledge?.embedding_model ||
modelsData?.embedding_models?.find((m) => m.default)?.value ||
"text-embedding-ada-002"
}
onValueChange={handleEmbeddingModelChange}
>
@ -746,7 +771,9 @@ function KnowledgeSourcesPage() {
<SelectContent>
<ModelSelectItems
models={modelsData?.embedding_models}
fallbackModels={getFallbackModels(currentProvider).embedding}
fallbackModels={
getFallbackModels(currentProvider).embedding
}
provider={currentProvider}
/>
</SelectContent>
@ -807,7 +834,10 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="standard" id="standard" />
<div className="flex-1">
<Label htmlFor="standard" className="text-base font-medium cursor-pointer">
<Label
htmlFor="standard"
className="text-base font-medium cursor-pointer"
>
Standard
</Label>
<div className="text-sm text-muted-foreground">
@ -818,18 +848,28 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="ocr" id="ocr" />
<div className="flex-1">
<Label htmlFor="ocr" className="text-base font-medium cursor-pointer">
<Label
htmlFor="ocr"
className="text-base font-medium cursor-pointer"
>
Extract text from images
</Label>
<div className="text-sm text-muted-foreground">
Uses OCR to extract text from images/PDFs. Ingest is slower when enabled
Uses OCR to extract text from images/PDFs. Ingest is
slower when enabled
</div>
</div>
</div>
<div className="flex items-center space-x-3">
<RadioGroupItem value="picture_description" id="picture_description" />
<RadioGroupItem
value="picture_description"
id="picture_description"
/>
<div className="flex-1">
<Label htmlFor="picture_description" className="text-base font-medium cursor-pointer">
<Label
htmlFor="picture_description"
className="text-base font-medium cursor-pointer"
>
Generate Description
</Label>
<div className="text-sm text-muted-foreground">
@ -840,11 +880,15 @@ function KnowledgeSourcesPage() {
<div className="flex items-center space-x-3">
<RadioGroupItem value="VLM" id="VLM" />
<div className="flex-1">
<Label htmlFor="VLM" className="text-base font-medium cursor-pointer">
<Label
htmlFor="VLM"
className="text-base font-medium cursor-pointer"
>
AI Vision
</Label>
<div className="text-sm text-muted-foreground">
Advanced processing with vision language models. Highest quality but most expensive
Advanced processing with vision language models. Highest
quality but most expensive
</div>
</div>
</div>

View file

@ -0,0 +1,49 @@
interface AnimatedProcessingIconProps {
className?: string;
size?: number;
}
export const AnimatedProcessingIcon = ({
className = "",
size = 10,
}: AnimatedProcessingIconProps) => {
const width = Math.round((size * 6) / 10);
const height = size;
return (
<svg
width={width}
height={height}
viewBox="0 0 6 10"
fill="none"
xmlns="http://www.w3.org/2000/svg"
className={className}
>
<style>
{`
.dot-1 { animation: pulse-wave 1.5s infinite; animation-delay: 0s; }
.dot-2 { animation: pulse-wave 1.5s infinite; animation-delay: 0.1s; }
.dot-3 { animation: pulse-wave 1.5s infinite; animation-delay: 0.2s; }
.dot-4 { animation: pulse-wave 1.5s infinite; animation-delay: 0.3s; }
.dot-5 { animation: pulse-wave 1.5s infinite; animation-delay: 0.4s; }
@keyframes pulse-wave {
0%, 60%, 100% {
opacity: 0.25;
transform: scale(1);
}
30% {
opacity: 1;
transform: scale(1.2);
}
}
`}
</style>
<circle className="dot-1" cx="1" cy="5" r="1" fill="currentColor" />
<circle className="dot-2" cx="1" cy="9" r="1" fill="currentColor" />
<circle className="dot-3" cx="5" cy="1" r="1" fill="currentColor" />
<circle className="dot-4" cx="5" cy="5" r="1" fill="currentColor" />
<circle className="dot-5" cx="5" cy="9" r="1" fill="currentColor" />
</svg>
);
};

View file

@ -0,0 +1,58 @@
import { AnimatedProcessingIcon } from "./animated-processing-icon";
export type Status =
| "processing"
| "active"
| "unavailable"
| "hidden"
| "sync"
| "failed";
interface StatusBadgeProps {
status: Status;
className?: string;
}
const statusConfig = {
processing: {
label: "Processing",
className: "text-muted-foreground dark:text-muted-foreground ",
},
active: {
label: "Active",
className: "text-emerald-600 dark:text-emerald-400 ",
},
unavailable: {
label: "Unavailable",
className: "text-red-600 dark:text-red-400 ",
},
failed: {
label: "Failed",
className: "text-red-600 dark:text-red-400 ",
},
hidden: {
label: "Hidden",
className: "text-zinc-400 dark:text-zinc-500 ",
},
sync: {
label: "Sync",
className: "text-amber-700 dark:text-amber-300 underline",
},
};
export const StatusBadge = ({ status, className }: StatusBadgeProps) => {
const config = statusConfig[status];
return (
<div
className={`inline-flex items-center gap-1 ${config.className} ${
className || ""
}`}
>
{status === "processing" && (
<AnimatedProcessingIcon className="text-current mr-2" size={10} />
)}
{config.label}
</div>
);
};

View file

@ -35,9 +35,22 @@ export interface Task {
files?: Record<string, Record<string, unknown>>;
}
export interface TaskFile {
filename: string;
mimetype: string;
source_url: string;
size: number;
connector_type: string;
status: "active" | "failed" | "processing";
task_id: string;
created_at: string;
updated_at: string;
}
interface TaskContextType {
tasks: Task[];
files: TaskFile[];
addTask: (taskId: string) => void;
addFiles: (files: Partial<TaskFile>[], taskId: string) => void;
removeTask: (taskId: string) => void;
refreshTasks: () => Promise<void>;
cancelTask: (taskId: string) => Promise<void>;
@ -51,6 +64,7 @@ const TaskContext = createContext<TaskContextType | undefined>(undefined);
export function TaskProvider({ children }: { children: React.ReactNode }) {
const [tasks, setTasks] = useState<Task[]>([]);
const [files, setFiles] = useState<TaskFile[]>([]);
const [isPolling, setIsPolling] = useState(false);
const [isFetching, setIsFetching] = useState(false);
const [isMenuOpen, setIsMenuOpen] = useState(false);
@ -58,12 +72,32 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const queryClient = useQueryClient();
const refetchSearch = () => {
const refetchSearch = useCallback(() => {
queryClient.invalidateQueries({
queryKey: ["search"],
exact: false,
});
};
}, [queryClient]);
const addFiles = useCallback(
(newFiles: Partial<TaskFile>[], taskId: string) => {
const now = new Date().toISOString();
const filesToAdd: TaskFile[] = newFiles.map((file) => ({
filename: file.filename || "",
mimetype: file.mimetype || "",
source_url: file.source_url || "",
size: file.size || 0,
connector_type: file.connector_type || "local",
status: "processing",
task_id: taskId,
created_at: now,
updated_at: now,
}));
setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
},
[],
);
const fetchTasks = useCallback(async () => {
if (!isAuthenticated && !isNoAuthMode) return;
@ -76,13 +110,87 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const newTasks = data.tasks || [];
// Update tasks and check for status changes in the same state update
setTasks(prevTasks => {
setTasks((prevTasks) => {
// Check for newly completed tasks to show toasts
if (prevTasks.length > 0) {
newTasks.forEach((newTask: Task) => {
const oldTask = prevTasks.find(
t => t.task_id === newTask.task_id
(t) => t.task_id === newTask.task_id,
);
// Update or add files from task.files if available
if (newTask.files && typeof newTask.files === "object") {
const taskFileEntries = Object.entries(newTask.files);
const now = new Date().toISOString();
taskFileEntries.forEach(([filePath, fileInfo]) => {
if (typeof fileInfo === "object" && fileInfo) {
const fileName = filePath.split("/").pop() || filePath;
const fileStatus = fileInfo.status as string;
// Map backend file status to our TaskFile status
let mappedStatus: TaskFile["status"];
switch (fileStatus) {
case "pending":
case "running":
mappedStatus = "processing";
break;
case "completed":
mappedStatus = "active";
break;
case "failed":
mappedStatus = "failed";
break;
default:
mappedStatus = "processing";
}
setFiles((prevFiles) => {
const existingFileIndex = prevFiles.findIndex(
(f) =>
f.source_url === filePath &&
f.task_id === newTask.task_id,
);
// Detect connector type based on file path or other indicators
let connectorType = "local";
if (filePath.includes("/") && !filePath.startsWith("/")) {
// Likely S3 key format (bucket/path/file.ext)
connectorType = "s3";
}
const fileEntry: TaskFile = {
filename: fileName,
mimetype: "", // We don't have this info from the task
source_url: filePath,
size: 0, // We don't have this info from the task
connector_type: connectorType,
status: mappedStatus,
task_id: newTask.task_id,
created_at:
typeof fileInfo.created_at === "string"
? fileInfo.created_at
: now,
updated_at:
typeof fileInfo.updated_at === "string"
? fileInfo.updated_at
: now,
};
if (existingFileIndex >= 0) {
// Update existing file
const updatedFiles = [...prevFiles];
updatedFiles[existingFileIndex] = fileEntry;
return updatedFiles;
} else {
// Add new file
return [...prevFiles, fileEntry];
}
});
}
});
}
if (
oldTask &&
oldTask.status !== "completed" &&
@ -99,9 +207,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
refetchSearch();
// Dispatch knowledge updated event for all knowledge-related pages
console.log(
"Task completed successfully, dispatching knowledgeUpdated event"
"Task completed successfully, dispatching knowledgeUpdated event",
);
window.dispatchEvent(new CustomEvent("knowledgeUpdated"));
// Remove files for this completed task from the files list
setFiles((prevFiles) =>
prevFiles.filter((file) => file.task_id !== newTask.task_id),
);
} else if (
oldTask &&
oldTask.status !== "failed" &&
@ -114,6 +227,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
newTask.error || "Unknown error"
}`,
});
// Files will be updated to failed status by the file parsing logic above
}
});
}
@ -126,7 +241,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
} finally {
setIsFetching(false);
}
}, [isAuthenticated, isNoAuthMode]); // Removed 'tasks' from dependencies to prevent infinite loop!
}, [isAuthenticated, isNoAuthMode, refetchSearch]); // Removed 'tasks' from dependencies to prevent infinite loop!
const addTask = useCallback((taskId: string) => {
// Immediately start aggressive polling for the new task
@ -140,19 +255,21 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const data = await response.json();
const newTasks = data.tasks || [];
const foundTask = newTasks.find(
(task: Task) => task.task_id === taskId
(task: Task) => task.task_id === taskId,
);
if (foundTask) {
// Task found! Update the tasks state
setTasks(prevTasks => {
setTasks((prevTasks) => {
// Check if task is already in the list
const exists = prevTasks.some(t => t.task_id === taskId);
const exists = prevTasks.some((t) => t.task_id === taskId);
if (!exists) {
return [...prevTasks, foundTask];
}
// Update existing task
return prevTasks.map(t => (t.task_id === taskId ? foundTask : t));
return prevTasks.map((t) =>
t.task_id === taskId ? foundTask : t,
);
});
return; // Stop polling, we found it
}
@ -177,7 +294,7 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
}, [fetchTasks]);
const removeTask = useCallback((taskId: string) => {
setTasks(prev => prev.filter(task => task.task_id !== taskId));
setTasks((prev) => prev.filter((task) => task.task_id !== taskId));
}, []);
const cancelTask = useCallback(
@ -204,11 +321,11 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
});
}
},
[fetchTasks]
[fetchTasks],
);
const toggleMenu = useCallback(() => {
setIsMenuOpen(prev => !prev);
setIsMenuOpen((prev) => !prev);
}, []);
// Periodic polling for task updates
@ -231,7 +348,9 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
const value: TaskContextType = {
tasks,
files,
addTask,
addFiles,
removeTask,
refreshTasks,
cancelTask,

View file

@ -17,14 +17,18 @@ async def get_openai_models(request, models_service, session_manager):
try:
config = get_openrag_config()
api_key = config.provider.api_key
logger.info(f"Retrieved API key from config: {'yes' if api_key else 'no'}")
logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not api_key:
return JSONResponse(
{"error": "OpenAI API key is required either as query parameter or in configuration"},
status_code=400
{
"error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_openai_models(api_key=api_key)
@ -32,8 +36,7 @@ async def get_openai_models(request, models_service, session_manager):
except Exception as e:
logger.error(f"Failed to get OpenAI models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve OpenAI models: {str(e)}"},
status_code=500
{"error": f"Failed to retrieve OpenAI models: {str(e)}"}, status_code=500
)
@ -44,13 +47,31 @@ async def get_ollama_models(request, models_service, session_manager):
query_params = dict(request.query_params)
endpoint = query_params.get("endpoint")
# If no API key provided, try to get it from stored configuration
if not endpoint:
try:
config = get_openrag_config()
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ollama_models(endpoint=endpoint)
return JSONResponse(models)
except Exception as e:
logger.error(f"Failed to get Ollama models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve Ollama models: {str(e)}"},
status_code=500
{"error": f"Failed to retrieve Ollama models: {str(e)}"}, status_code=500
)
@ -63,15 +84,65 @@ async def get_ibm_models(request, models_service, session_manager):
api_key = query_params.get("api_key")
project_id = query_params.get("project_id")
config = get_openrag_config()
# If no API key provided, try to get it from stored configuration
if not api_key:
try:
api_key = config.provider.api_key
logger.info(
f"Retrieved API key from config: {'yes' if api_key else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not api_key:
return JSONResponse(
{
"error": "OpenAI API key is required either as query parameter or in configuration"
},
status_code=400,
)
if not endpoint:
try:
endpoint = config.provider.endpoint
logger.info(
f"Retrieved endpoint from config: {'yes' if endpoint else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not endpoint:
return JSONResponse(
{
"error": "Endpoint is required either as query parameter or in configuration"
},
status_code=400,
)
if not project_id:
try:
project_id = config.provider.project_id
logger.info(
f"Retrieved project ID from config: {'yes' if project_id else 'no'}"
)
except Exception as e:
logger.error(f"Failed to get config: {e}")
if not project_id:
return JSONResponse(
{
"error": "Project ID is required either as query parameter or in configuration"
},
status_code=400,
)
models = await models_service.get_ibm_models(
endpoint=endpoint,
api_key=api_key,
project_id=project_id
endpoint=endpoint, api_key=api_key, project_id=project_id
)
return JSONResponse(models)
except Exception as e:
logger.error(f"Failed to get IBM models: {str(e)}")
return JSONResponse(
{"error": f"Failed to retrieve IBM models: {str(e)}"},
status_code=500
)
{"error": f"Failed to retrieve IBM models: {str(e)}"}, status_code=500
)

View file

@ -17,7 +17,9 @@ class TaskService:
def __init__(self, document_service=None, process_pool=None):
self.document_service = document_service
self.process_pool = process_pool
self.task_store: dict[str, dict[str, UploadTask]] = {} # user_id -> {task_id -> UploadTask}
self.task_store: dict[
str, dict[str, UploadTask]
] = {} # user_id -> {task_id -> UploadTask}
self.background_tasks = set()
if self.process_pool is None:
@ -122,18 +124,27 @@ class TaskService:
# Process files with limited concurrency to avoid overwhelming the system
max_workers = get_worker_count()
semaphore = asyncio.Semaphore(max_workers * 2) # Allow 2x process pool size for async I/O
semaphore = asyncio.Semaphore(
max_workers * 2
) # Allow 2x process pool size for async I/O
async def process_with_semaphore(file_path: str):
async with semaphore:
await self.document_service.process_single_file_task(upload_task, file_path)
await self.document_service.process_single_file_task(
upload_task, file_path
)
tasks = [process_with_semaphore(file_path) for file_path in upload_task.file_tasks.keys()]
tasks = [
process_with_semaphore(file_path)
for file_path in upload_task.file_tasks.keys()
]
await asyncio.gather(*tasks, return_exceptions=True)
except Exception as e:
logger.error("Background upload processor failed", task_id=task_id, error=str(e))
logger.error(
"Background upload processor failed", task_id=task_id, error=str(e)
)
import traceback
traceback.print_exc()
@ -141,7 +152,9 @@ class TaskService:
self.task_store[user_id][task_id].status = TaskStatus.FAILED
self.task_store[user_id][task_id].updated_at = time.time()
async def background_custom_processor(self, user_id: str, task_id: str, items: list) -> None:
async def background_custom_processor(
self, user_id: str, task_id: str, items: list
) -> None:
"""Background task to process items using custom processor"""
try:
upload_task = self.task_store[user_id][task_id]
@ -163,7 +176,9 @@ class TaskService:
try:
await processor.process_item(upload_task, item, file_task)
except Exception as e:
logger.error("Failed to process item", item=str(item), error=str(e))
logger.error(
"Failed to process item", item=str(item), error=str(e)
)
import traceback
traceback.print_exc()
@ -190,7 +205,9 @@ class TaskService:
pass
raise # Re-raise to properly handle cancellation
except Exception as e:
logger.error("Background custom processor failed", task_id=task_id, error=str(e))
logger.error(
"Background custom processor failed", task_id=task_id, error=str(e)
)
import traceback
traceback.print_exc()
@ -212,7 +229,10 @@ class TaskService:
upload_task = None
for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
upload_task = self.task_store[candidate_user_id][task_id]
break
@ -271,10 +291,23 @@ class TaskService:
if task_id in tasks_by_id:
continue
# Calculate running and pending counts
# Calculate running and pending counts and build file statuses
running_files_count = 0
pending_files_count = 0
for file_task in upload_task.file_tasks.values():
file_statuses = {}
for file_path, file_task in upload_task.file_tasks.items():
if file_task.status.value != "completed":
file_statuses[file_path] = {
"status": file_task.status.value,
"result": file_task.result,
"error": file_task.error,
"retry_count": file_task.retry_count,
"created_at": file_task.created_at,
"updated_at": file_task.updated_at,
"duration_seconds": file_task.duration_seconds,
}
if file_task.status.value == "running":
running_files_count += 1
elif file_task.status.value == "pending":
@ -292,6 +325,7 @@ class TaskService:
"created_at": upload_task.created_at,
"updated_at": upload_task.updated_at,
"duration_seconds": upload_task.duration_seconds,
"files": file_statuses,
}
# First, add user-owned tasks; then shared anonymous;
@ -312,7 +346,10 @@ class TaskService:
store_user_id = None
for candidate_user_id in candidate_user_ids:
if candidate_user_id in self.task_store and task_id in self.task_store[candidate_user_id]:
if (
candidate_user_id in self.task_store
and task_id in self.task_store[candidate_user_id]
):
store_user_id = candidate_user_id
break
@ -326,7 +363,10 @@ class TaskService:
return False
# Cancel the background task to stop scheduling new work
if hasattr(upload_task, "background_task") and not upload_task.background_task.done():
if (
hasattr(upload_task, "background_task")
and not upload_task.background_task.done()
):
upload_task.background_task.cancel()
# Mark task as failed (cancelled)