From cc5711bb5e19626db41d1eb698f1ddc7c47f8afa Mon Sep 17 00:00:00 2001
From: Lucas Oliveira <lucas.edu.oli@hotmail.com>
Date: Wed, 3 Dec 2025 17:12:06 -0300
Subject: [PATCH] make health check enable completion check if chat is failed

---
 .../app/api/queries/useProviderHealthQuery.ts | 235 ++++++++++--------
 frontend/app/chat/page.tsx                    |  14 ++
 .../components/provider-health-banner.tsx     |   6 +-
 frontend/contexts/chat-context.tsx            |   6 +
 4 files changed, 151 insertions(+), 110 deletions(-)

diff --git a/frontend/app/api/queries/useProviderHealthQuery.ts b/frontend/app/api/queries/useProviderHealthQuery.ts
index d0f73335..24980467 100644
--- a/frontend/app/api/queries/useProviderHealthQuery.ts
+++ b/frontend/app/api/queries/useProviderHealthQuery.ts
@@ -1,136 +1,153 @@
 import {
-  type UseQueryOptions,
-  useQuery,
-  useQueryClient,
+	type UseQueryOptions,
+	useQuery,
+	useQueryClient,
 } from "@tanstack/react-query";
+import { useChat } from "@/contexts/chat-context";
 import { useGetSettingsQuery } from "./useGetSettingsQuery";
 
 export interface ProviderHealthDetails {
-  llm_model: string;
-  embedding_model: string;
-  endpoint?: string | null;
+	llm_model: string;
+	embedding_model: string;
+	endpoint?: string | null;
 }
 
 export interface ProviderHealthResponse {
-  status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
-  message: string;
-  provider?: string;
-  llm_provider?: string;
-  embedding_provider?: string;
-  llm_error?: string | null;
-  embedding_error?: string | null;
-  details?: ProviderHealthDetails;
+	status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
+	message: string;
+	provider?: string;
+	llm_provider?: string;
+	embedding_provider?: string;
+	llm_error?: string | null;
+	embedding_error?: string | null;
+	details?: ProviderHealthDetails;
 }
 
 export interface ProviderHealthParams {
-  provider?: "openai" | "ollama" | "watsonx";
+	provider?: "openai" | "ollama" | "watsonx";
+	test_completion?: boolean;
 }
 
 // Track consecutive failures for exponential backoff
 const failureCountMap = new Map<string, number>();
 
 export const useProviderHealthQuery = (
-  params?: ProviderHealthParams,
-  options?: Omit<
-    UseQueryOptions<ProviderHealthResponse, Error>,
-    "queryKey" | "queryFn"
-  >,
+	params?: ProviderHealthParams,
+	options?: Omit<
+		UseQueryOptions<ProviderHealthResponse, Error>,
+		"queryKey" | "queryFn"
+	>,
 ) => {
-  const queryClient = useQueryClient();
+	const queryClient = useQueryClient();
 
-  const { data: settings = {} } = useGetSettingsQuery();
+	// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
+	const { hasChatError, setChatError } = useChat();
 
-  async function checkProviderHealth(): Promise<ProviderHealthResponse> {
-    try {
-      const url = new URL("/api/provider/health", window.location.origin);
+	const { data: settings = {} } = useGetSettingsQuery();
 
-      // Add provider query param if specified
-      if (params?.provider) {
-        url.searchParams.set("provider", params.provider);
-      }
+	async function checkProviderHealth(): Promise<ProviderHealthResponse> {
+		try {
+			const url = new URL("/api/provider/health", window.location.origin);
 
-      const response = await fetch(url.toString());
+			// Add provider query param if specified
+			if (params?.provider) {
+				url.searchParams.set("provider", params.provider);
+			}
 
-      if (response.ok) {
-        return await response.json();
-      } else if (response.status === 503) {
-        // Backend is up but provider validation failed
-        const errorData = await response.json().catch(() => ({}));
-        return {
-          status: "unhealthy",
-          message: errorData.message || "Provider validation failed",
-          provider: errorData.provider || params?.provider || "unknown",
-          llm_provider: errorData.llm_provider,
-          embedding_provider: errorData.embedding_provider,
-          llm_error: errorData.llm_error,
-          embedding_error: errorData.embedding_error,
-          details: errorData.details,
-        };
-      } else {
-        // Other backend errors (400, etc.) - treat as provider issues
-        const errorData = await response.json().catch(() => ({}));
-        return {
-          status: "error",
-          message: errorData.message || "Failed to check provider health",
-          provider: errorData.provider || params?.provider || "unknown",
-          llm_provider: errorData.llm_provider,
-          embedding_provider: errorData.embedding_provider,
-          llm_error: errorData.llm_error,
-          embedding_error: errorData.embedding_error,
-          details: errorData.details,
-        };
-      }
-    } catch (error) {
-      // Network error - backend is likely down, don't show provider banner
-      return {
-        status: "backend-unavailable",
-        message: error instanceof Error ? error.message : "Connection failed",
-        provider: params?.provider || "unknown",
-      };
-    }
-  }
+			// Add test_completion query param if specified or if chat error exists
+			const testCompletion = params?.test_completion ?? hasChatError;
+			if (testCompletion) {
+				url.searchParams.set("test_completion", "true");
+			}
 
-  const queryKey = ["provider", "health"];
-  const failureCountKey = queryKey.join("-");
+			const response = await fetch(url.toString());
 
-  const queryResult = useQuery(
-    {
-      queryKey,
-      queryFn: checkProviderHealth,
-      retry: false, // Don't retry health checks automatically
-      refetchInterval: (query) => {
-        const data = query.state.data;
-        const status = data?.status;
-        
-        // If healthy, reset failure count and check every 30 seconds
-        if (status === "healthy") {
-          failureCountMap.set(failureCountKey, 0);
-          return 30000;
-        }
-        
-        // If backend unavailable, use moderate polling
-        if (status === "backend-unavailable") {
-          return 15000;
-        }
-        
-        // For unhealthy/error status, use exponential backoff
-        const currentFailures = failureCountMap.get(failureCountKey) || 0;
-        failureCountMap.set(failureCountKey, currentFailures + 1);
-        
-        // Exponential backoff: 5s, 10s, 20s, then 30s
-        const backoffDelays = [5000, 10000, 20000, 30000];
-        const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
-        
-        return delay;
-      },
-      refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
-      refetchOnMount: true,
-      staleTime: 30000, // Consider data stale after 30 seconds
-      enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
-      ...options,
-    },
-    queryClient,
-  );
+			if (response.ok) {
+				return await response.json();
+			} else if (response.status === 503) {
+				// Backend is up but provider validation failed
+				const errorData = await response.json().catch(() => ({}));
+				return {
+					status: "unhealthy",
+					message: errorData.message || "Provider validation failed",
+					provider: errorData.provider || params?.provider || "unknown",
+					llm_provider: errorData.llm_provider,
+					embedding_provider: errorData.embedding_provider,
+					llm_error: errorData.llm_error,
+					embedding_error: errorData.embedding_error,
+					details: errorData.details,
+				};
+			} else {
+				// Other backend errors (400, etc.) - treat as provider issues
+				const errorData = await response.json().catch(() => ({}));
+				return {
+					status: "error",
+					message: errorData.message || "Failed to check provider health",
+					provider: errorData.provider || params?.provider || "unknown",
+					llm_provider: errorData.llm_provider,
+					embedding_provider: errorData.embedding_provider,
+					llm_error: errorData.llm_error,
+					embedding_error: errorData.embedding_error,
+					details: errorData.details,
+				};
+			}
+		} catch (error) {
+			// Network error - backend is likely down, don't show provider banner
+			return {
+				status: "backend-unavailable",
+				message: error instanceof Error ? error.message : "Connection failed",
+				provider: params?.provider || "unknown",
+			};
+		}
+	}
 
-  return queryResult;
+	const queryKey = ["provider", "health", params?.test_completion];
+	const failureCountKey = queryKey.join("-");
+
+	const queryResult = useQuery(
+		{
+			queryKey,
+			queryFn: checkProviderHealth,
+			retry: false, // Don't retry health checks automatically
+			refetchInterval: (query) => {
+				const data = query.state.data;
+				const status = data?.status;
+
+				// If healthy, reset failure count and check every 30 seconds
+				// Also reset chat error flag if we're using test_completion=true and it succeeded
+				if (status === "healthy") {
+					failureCountMap.set(failureCountKey, 0);
+					// If we were checking with test_completion=true due to chat errors, reset the flag
+					if (hasChatError && setChatError) {
+						setChatError(false);
+					}
+					return 30000;
+				}
+
+				// If backend unavailable, use moderate polling
+				if (status === "backend-unavailable") {
+					return 15000;
+				}
+
+				// For unhealthy/error status, use exponential backoff
+				const currentFailures = failureCountMap.get(failureCountKey) || 0;
+				failureCountMap.set(failureCountKey, currentFailures + 1);
+
+				// Exponential backoff: 5s, 10s, 20s, then 30s
+				const backoffDelays = [5000, 10000, 20000, 30000];
+				const delay =
+					backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
+
+				return delay;
+			},
+			refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
+			refetchOnMount: true,
+			staleTime: 30000, // Consider data stale after 30 seconds
+			enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
+			...options,
+		},
+		queryClient,
+	);
+
+	return queryResult;
 };
diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx
index 358424f3..87ae6b60 100644
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@@ -51,6 +51,7 @@ function ChatPage() {
   ]);
   const [input, setInput] = useState("");
   const { loading, setLoading } = useLoadingStore();
+  const { setChatError } = useChat();
   const [asyncMode, setAsyncMode] = useState(true);
   const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
     Set<string>
@@ -123,6 +124,8 @@ function ChatPage() {
       console.error("Streaming error:", error);
       setLoading(false);
       setWaitingTooLong(false);
+      // Set chat error flag to trigger test_completion=true on health checks
+      setChatError(true);
       const errorMessage: Message = {
         role: "assistant",
         content:
@@ -197,6 +200,11 @@ function ChatPage() {
       const result = await response.json();
       console.log("Upload result:", result);
 
+      if (!response.ok) {
+        // Set chat error flag if upload fails
+        setChatError(true);
+      }
+
       if (response.status === 201) {
         // New flow: Got task ID, start tracking with centralized system
         const taskId = result.task_id || result.id;
@@ -255,6 +263,8 @@ function ChatPage() {
       }
     } catch (error) {
       console.error("Upload failed:", error);
+      // Set chat error flag to trigger test_completion=true on health checks
+      setChatError(true);
       const errorMessage: Message = {
         role: "assistant",
         content: `❌ Failed to process document. Please try again.`,
@@ -858,6 +868,8 @@ function ChatPage() {
           }
         } else {
           console.error("Chat failed:", result.error);
+          // Set chat error flag to trigger test_completion=true on health checks
+          setChatError(true);
           const errorMessage: Message = {
             role: "assistant",
             content: "Sorry, I encountered an error. Please try again.",
@@ -867,6 +879,8 @@ function ChatPage() {
         }
       } catch (error) {
         console.error("Chat error:", error);
+        // Set chat error flag to trigger test_completion=true on health checks
+        setChatError(true);
         const errorMessage: Message = {
           role: "assistant",
           content:
diff --git a/frontend/components/provider-health-banner.tsx b/frontend/components/provider-health-banner.tsx
index f83713eb..1a91a601 100644
--- a/frontend/components/provider-health-banner.tsx
+++ b/frontend/components/provider-health-banner.tsx
@@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
 import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
 import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
 import { cn } from "@/lib/utils";
+import { useChat } from "@/contexts/chat-context";
 import { Button } from "./ui/button";
 
 interface ProviderHealthBannerProps {
@@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {
 
 // Custom hook to check provider health status
 export function useProviderHealth() {
+  const { hasChatError } = useChat();
   const {
     data: health,
     isLoading,
     isFetching,
     error,
     isError,
-  } = useProviderHealthQuery();
+  } = useProviderHealthQuery({
+    test_completion: hasChatError, // Use test_completion=true when chat errors occur
+  });
 
   const isHealthy = health?.status === "healthy" && !isError;
   // Only consider unhealthy if backend is up but provider validation failed
diff --git a/frontend/contexts/chat-context.tsx b/frontend/contexts/chat-context.tsx
index bee05b98..46c8a2f8 100644
--- a/frontend/contexts/chat-context.tsx
+++ b/frontend/contexts/chat-context.tsx
@@ -79,6 +79,8 @@ interface ChatContextType {
   conversationFilter: KnowledgeFilter | null;
   // responseId: undefined = use currentConversationId, null = don't save to localStorage
   setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
+  hasChatError: boolean;
+  setChatError: (hasError: boolean) => void;
 }
 
 const ChatContext = createContext<ChatContextType | undefined>(undefined);
@@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
   const [conversationLoaded, setConversationLoaded] = useState(false);
   const [conversationFilter, setConversationFilterState] =
     useState<KnowledgeFilter | null>(null);
+  const [hasChatError, setChatError] = useState(false);
 
   // Debounce refresh requests to prevent excessive reloads
   const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
@@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
       setConversationLoaded,
       conversationFilter,
       setConversationFilter,
+      hasChatError,
+      setChatError,
     }),
     [
       endpoint,
@@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
       conversationLoaded,
       conversationFilter,
       setConversationFilter,
+      hasChatError,
     ],
   );