From cc5711bb5e19626db41d1eb698f1ddc7c47f8afa Mon Sep 17 00:00:00 2001 From: Lucas Oliveira Date: Wed, 3 Dec 2025 17:12:06 -0300 Subject: [PATCH] make health check enable completion check if chat is failed --- .../app/api/queries/useProviderHealthQuery.ts | 235 ++++++++++-------- frontend/app/chat/page.tsx | 14 ++ .../components/provider-health-banner.tsx | 6 +- frontend/contexts/chat-context.tsx | 6 + 4 files changed, 151 insertions(+), 110 deletions(-) diff --git a/frontend/app/api/queries/useProviderHealthQuery.ts b/frontend/app/api/queries/useProviderHealthQuery.ts index d0f73335..24980467 100644 --- a/frontend/app/api/queries/useProviderHealthQuery.ts +++ b/frontend/app/api/queries/useProviderHealthQuery.ts @@ -1,136 +1,153 @@ import { - type UseQueryOptions, - useQuery, - useQueryClient, + type UseQueryOptions, + useQuery, + useQueryClient, } from "@tanstack/react-query"; +import { useChat } from "@/contexts/chat-context"; import { useGetSettingsQuery } from "./useGetSettingsQuery"; export interface ProviderHealthDetails { - llm_model: string; - embedding_model: string; - endpoint?: string | null; + llm_model: string; + embedding_model: string; + endpoint?: string | null; } export interface ProviderHealthResponse { - status: "healthy" | "unhealthy" | "error" | "backend-unavailable"; - message: string; - provider?: string; - llm_provider?: string; - embedding_provider?: string; - llm_error?: string | null; - embedding_error?: string | null; - details?: ProviderHealthDetails; + status: "healthy" | "unhealthy" | "error" | "backend-unavailable"; + message: string; + provider?: string; + llm_provider?: string; + embedding_provider?: string; + llm_error?: string | null; + embedding_error?: string | null; + details?: ProviderHealthDetails; } export interface ProviderHealthParams { - provider?: "openai" | "ollama" | "watsonx"; + provider?: "openai" | "ollama" | "watsonx"; + test_completion?: boolean; } // Track consecutive failures for exponential backoff const failureCountMap = new Map(); export const useProviderHealthQuery = ( - params?: ProviderHealthParams, - options?: Omit< - UseQueryOptions, - "queryKey" | "queryFn" - >, + params?: ProviderHealthParams, + options?: Omit< + UseQueryOptions, + "queryKey" | "queryFn" + >, ) => { - const queryClient = useQueryClient(); + const queryClient = useQueryClient(); - const { data: settings = {} } = useGetSettingsQuery(); + // Get chat error state from context (ChatProvider wraps the entire app in layout.tsx) + const { hasChatError, setChatError } = useChat(); - async function checkProviderHealth(): Promise { - try { - const url = new URL("/api/provider/health", window.location.origin); + const { data: settings = {} } = useGetSettingsQuery(); - // Add provider query param if specified - if (params?.provider) { - url.searchParams.set("provider", params.provider); - } + async function checkProviderHealth(): Promise { + try { + const url = new URL("/api/provider/health", window.location.origin); - const response = await fetch(url.toString()); + // Add provider query param if specified + if (params?.provider) { + url.searchParams.set("provider", params.provider); + } - if (response.ok) { - return await response.json(); - } else if (response.status === 503) { - // Backend is up but provider validation failed - const errorData = await response.json().catch(() => ({})); - return { - status: "unhealthy", - message: errorData.message || "Provider validation failed", - provider: errorData.provider || params?.provider || "unknown", - llm_provider: errorData.llm_provider, - embedding_provider: errorData.embedding_provider, - llm_error: errorData.llm_error, - embedding_error: errorData.embedding_error, - details: errorData.details, - }; - } else { - // Other backend errors (400, etc.) - treat as provider issues - const errorData = await response.json().catch(() => ({})); - return { - status: "error", - message: errorData.message || "Failed to check provider health", - provider: errorData.provider || params?.provider || "unknown", - llm_provider: errorData.llm_provider, - embedding_provider: errorData.embedding_provider, - llm_error: errorData.llm_error, - embedding_error: errorData.embedding_error, - details: errorData.details, - }; - } - } catch (error) { - // Network error - backend is likely down, don't show provider banner - return { - status: "backend-unavailable", - message: error instanceof Error ? error.message : "Connection failed", - provider: params?.provider || "unknown", - }; - } - } + // Add test_completion query param if specified or if chat error exists + const testCompletion = params?.test_completion ?? hasChatError; + if (testCompletion) { + url.searchParams.set("test_completion", "true"); + } - const queryKey = ["provider", "health"]; - const failureCountKey = queryKey.join("-"); + const response = await fetch(url.toString()); - const queryResult = useQuery( - { - queryKey, - queryFn: checkProviderHealth, - retry: false, // Don't retry health checks automatically - refetchInterval: (query) => { - const data = query.state.data; - const status = data?.status; - - // If healthy, reset failure count and check every 30 seconds - if (status === "healthy") { - failureCountMap.set(failureCountKey, 0); - return 30000; - } - - // If backend unavailable, use moderate polling - if (status === "backend-unavailable") { - return 15000; - } - - // For unhealthy/error status, use exponential backoff - const currentFailures = failureCountMap.get(failureCountKey) || 0; - failureCountMap.set(failureCountKey, currentFailures + 1); - - // Exponential backoff: 5s, 10s, 20s, then 30s - const backoffDelays = [5000, 10000, 20000, 30000]; - const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)]; - - return delay; - }, - refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches - refetchOnMount: true, - staleTime: 30000, // Consider data stale after 30 seconds - enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete - ...options, - }, - queryClient, - ); + if (response.ok) { + return await response.json(); + } else if (response.status === 503) { + // Backend is up but provider validation failed + const errorData = await response.json().catch(() => ({})); + return { + status: "unhealthy", + message: errorData.message || "Provider validation failed", + provider: errorData.provider || params?.provider || "unknown", + llm_provider: errorData.llm_provider, + embedding_provider: errorData.embedding_provider, + llm_error: errorData.llm_error, + embedding_error: errorData.embedding_error, + details: errorData.details, + }; + } else { + // Other backend errors (400, etc.) - treat as provider issues + const errorData = await response.json().catch(() => ({})); + return { + status: "error", + message: errorData.message || "Failed to check provider health", + provider: errorData.provider || params?.provider || "unknown", + llm_provider: errorData.llm_provider, + embedding_provider: errorData.embedding_provider, + llm_error: errorData.llm_error, + embedding_error: errorData.embedding_error, + details: errorData.details, + }; + } + } catch (error) { + // Network error - backend is likely down, don't show provider banner + return { + status: "backend-unavailable", + message: error instanceof Error ? error.message : "Connection failed", + provider: params?.provider || "unknown", + }; + } + } - return queryResult; + const queryKey = ["provider", "health", params?.test_completion]; + const failureCountKey = queryKey.join("-"); + + const queryResult = useQuery( + { + queryKey, + queryFn: checkProviderHealth, + retry: false, // Don't retry health checks automatically + refetchInterval: (query) => { + const data = query.state.data; + const status = data?.status; + + // If healthy, reset failure count and check every 30 seconds + // Also reset chat error flag if we're using test_completion=true and it succeeded + if (status === "healthy") { + failureCountMap.set(failureCountKey, 0); + // If we were checking with test_completion=true due to chat errors, reset the flag + if (hasChatError && setChatError) { + setChatError(false); + } + return 30000; + } + + // If backend unavailable, use moderate polling + if (status === "backend-unavailable") { + return 15000; + } + + // For unhealthy/error status, use exponential backoff + const currentFailures = failureCountMap.get(failureCountKey) || 0; + failureCountMap.set(failureCountKey, currentFailures + 1); + + // Exponential backoff: 5s, 10s, 20s, then 30s + const backoffDelays = [5000, 10000, 20000, 30000]; + const delay = + backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)]; + + return delay; + }, + refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches + refetchOnMount: true, + staleTime: 30000, // Consider data stale after 30 seconds + enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete + ...options, + }, + queryClient, + ); + + return queryResult; }; diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx index 358424f3..87ae6b60 100644 --- a/frontend/app/chat/page.tsx +++ b/frontend/app/chat/page.tsx @@ -51,6 +51,7 @@ function ChatPage() { ]); const [input, setInput] = useState(""); const { loading, setLoading } = useLoadingStore(); + const { setChatError } = useChat(); const [asyncMode, setAsyncMode] = useState(true); const [expandedFunctionCalls, setExpandedFunctionCalls] = useState< Set @@ -123,6 +124,8 @@ function ChatPage() { console.error("Streaming error:", error); setLoading(false); setWaitingTooLong(false); + // Set chat error flag to trigger test_completion=true on health checks + setChatError(true); const errorMessage: Message = { role: "assistant", content: @@ -197,6 +200,11 @@ function ChatPage() { const result = await response.json(); console.log("Upload result:", result); + if (!response.ok) { + // Set chat error flag if upload fails + setChatError(true); + } + if (response.status === 201) { // New flow: Got task ID, start tracking with centralized system const taskId = result.task_id || result.id; @@ -255,6 +263,8 @@ function ChatPage() { } } catch (error) { console.error("Upload failed:", error); + // Set chat error flag to trigger test_completion=true on health checks + setChatError(true); const errorMessage: Message = { role: "assistant", content: `❌ Failed to process document. Please try again.`, @@ -858,6 +868,8 @@ function ChatPage() { } } else { console.error("Chat failed:", result.error); + // Set chat error flag to trigger test_completion=true on health checks + setChatError(true); const errorMessage: Message = { role: "assistant", content: "Sorry, I encountered an error. Please try again.", @@ -867,6 +879,8 @@ function ChatPage() { } } catch (error) { console.error("Chat error:", error); + // Set chat error flag to trigger test_completion=true on health checks + setChatError(true); const errorMessage: Message = { role: "assistant", content: diff --git a/frontend/components/provider-health-banner.tsx b/frontend/components/provider-health-banner.tsx index f83713eb..1a91a601 100644 --- a/frontend/components/provider-health-banner.tsx +++ b/frontend/components/provider-health-banner.tsx @@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery import type { ModelProvider } from "@/app/settings/_helpers/model-helpers"; import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner"; import { cn } from "@/lib/utils"; +import { useChat } from "@/contexts/chat-context"; import { Button } from "./ui/button"; interface ProviderHealthBannerProps { @@ -14,13 +15,16 @@ interface ProviderHealthBannerProps { // Custom hook to check provider health status export function useProviderHealth() { + const { hasChatError } = useChat(); const { data: health, isLoading, isFetching, error, isError, - } = useProviderHealthQuery(); + } = useProviderHealthQuery({ + test_completion: hasChatError, // Use test_completion=true when chat errors occur + }); const isHealthy = health?.status === "healthy" && !isError; // Only consider unhealthy if backend is up but provider validation failed diff --git a/frontend/contexts/chat-context.tsx b/frontend/contexts/chat-context.tsx index bee05b98..46c8a2f8 100644 --- a/frontend/contexts/chat-context.tsx +++ b/frontend/contexts/chat-context.tsx @@ -79,6 +79,8 @@ interface ChatContextType { conversationFilter: KnowledgeFilter | null; // responseId: undefined = use currentConversationId, null = don't save to localStorage setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void; + hasChatError: boolean; + setChatError: (hasError: boolean) => void; } const ChatContext = createContext(undefined); @@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) { const [conversationLoaded, setConversationLoaded] = useState(false); const [conversationFilter, setConversationFilterState] = useState(null); + const [hasChatError, setChatError] = useState(false); // Debounce refresh requests to prevent excessive reloads const refreshTimeoutRef = useRef(null); @@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) { setConversationLoaded, conversationFilter, setConversationFilter, + hasChatError, + setChatError, }), [ endpoint, @@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) { conversationLoaded, conversationFilter, setConversationFilter, + hasChatError, ], );