make health check enable completion check if chat is failed

2025-12-03 17:12:06 -03:00 · 2025-12-03 17:12:06 -03:00 · cc5711bb5e
commit cc5711bb5e
parent 9b08f1fcee
4 changed files with 151 additions and 110 deletions
--- a/frontend/app/api/queries/useProviderHealthQuery.ts
+++ b/frontend/app/api/queries/useProviderHealthQuery.ts
@ -1,136 +1,153 @@
 import {
-  type UseQueryOptions,
-  useQuery,
-  useQueryClient,
+	type UseQueryOptions,
+	useQuery,
+	useQueryClient,
 } from "@tanstack/react-query";
+import { useChat } from "@/contexts/chat-context";
 import { useGetSettingsQuery } from "./useGetSettingsQuery";

 export interface ProviderHealthDetails {
-  llm_model: string;
-  embedding_model: string;
-  endpoint?: string | null;
+	llm_model: string;
+	embedding_model: string;
+	endpoint?: string | null;
 }

 export interface ProviderHealthResponse {
-  status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
-  message: string;
-  provider?: string;
-  llm_provider?: string;
-  embedding_provider?: string;
-  llm_error?: string | null;
-  embedding_error?: string | null;
-  details?: ProviderHealthDetails;
+	status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
+	message: string;
+	provider?: string;
+	llm_provider?: string;
+	embedding_provider?: string;
+	llm_error?: string | null;
+	embedding_error?: string | null;
+	details?: ProviderHealthDetails;
 }

 export interface ProviderHealthParams {
-  provider?: "openai" | "ollama" | "watsonx";
+	provider?: "openai" | "ollama" | "watsonx";
+	test_completion?: boolean;
 }

 // Track consecutive failures for exponential backoff
 const failureCountMap = new Map<string, number>();

 export const useProviderHealthQuery = (
-  params?: ProviderHealthParams,
-  options?: Omit<
-    UseQueryOptions<ProviderHealthResponse, Error>,
-    "queryKey" | "queryFn"
-  >,
+	params?: ProviderHealthParams,
+	options?: Omit<
+		UseQueryOptions<ProviderHealthResponse, Error>,
+		"queryKey" | "queryFn"
+	>,
 ) => {
-  const queryClient = useQueryClient();
+	const queryClient = useQueryClient();

-  const { data: settings = {} } = useGetSettingsQuery();
+	// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
+	const { hasChatError, setChatError } = useChat();

-  async function checkProviderHealth(): Promise<ProviderHealthResponse> {
-    try {
-      const url = new URL("/api/provider/health", window.location.origin);
+	const { data: settings = {} } = useGetSettingsQuery();

-      // Add provider query param if specified
-      if (params?.provider) {
-        url.searchParams.set("provider", params.provider);
-      }
+	async function checkProviderHealth(): Promise<ProviderHealthResponse> {
+		try {
+			const url = new URL("/api/provider/health", window.location.origin);

-      const response = await fetch(url.toString());
+			// Add provider query param if specified
+			if (params?.provider) {
+				url.searchParams.set("provider", params.provider);
+			}

-      if (response.ok) {
-        return await response.json();
-      } else if (response.status === 503) {
-        // Backend is up but provider validation failed
-        const errorData = await response.json().catch(() => ({}));
-        return {
-          status: "unhealthy",
-          message: errorData.message || "Provider validation failed",
-          provider: errorData.provider || params?.provider || "unknown",
-          llm_provider: errorData.llm_provider,
-          embedding_provider: errorData.embedding_provider,
-          llm_error: errorData.llm_error,
-          embedding_error: errorData.embedding_error,
-          details: errorData.details,
-        };
-      } else {
-        // Other backend errors (400, etc.) - treat as provider issues
-        const errorData = await response.json().catch(() => ({}));
-        return {
-          status: "error",
-          message: errorData.message || "Failed to check provider health",
-          provider: errorData.provider || params?.provider || "unknown",
-          llm_provider: errorData.llm_provider,
-          embedding_provider: errorData.embedding_provider,
-          llm_error: errorData.llm_error,
-          embedding_error: errorData.embedding_error,
-          details: errorData.details,
-        };
-      }
-    } catch (error) {
-      // Network error - backend is likely down, don't show provider banner
-      return {
-        status: "backend-unavailable",
-        message: error instanceof Error ? error.message : "Connection failed",
-        provider: params?.provider || "unknown",
-      };
-    }
-  }
+			// Add test_completion query param if specified or if chat error exists
+			const testCompletion = params?.test_completion ?? hasChatError;
+			if (testCompletion) {
+				url.searchParams.set("test_completion", "true");
+			}

-  const queryKey = ["provider", "health"];
-  const failureCountKey = queryKey.join("-");
+			const response = await fetch(url.toString());

-  const queryResult = useQuery(
-    {
-      queryKey,
-      queryFn: checkProviderHealth,
-      retry: false, // Don't retry health checks automatically
-      refetchInterval: (query) => {
-        const data = query.state.data;
-        const status = data?.status;
-        
-        // If healthy, reset failure count and check every 30 seconds
-        if (status === "healthy") {
-          failureCountMap.set(failureCountKey, 0);
-          return 30000;
-        }
-        
-        // If backend unavailable, use moderate polling
-        if (status === "backend-unavailable") {
-          return 15000;
-        }
-        
-        // For unhealthy/error status, use exponential backoff
-        const currentFailures = failureCountMap.get(failureCountKey) || 0;
-        failureCountMap.set(failureCountKey, currentFailures + 1);
-        
-        // Exponential backoff: 5s, 10s, 20s, then 30s
-        const backoffDelays = [5000, 10000, 20000, 30000];
-        const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
-        
-        return delay;
-      },
-      refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
-      refetchOnMount: true,
-      staleTime: 30000, // Consider data stale after 30 seconds
-      enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
-      ...options,
-    },
-    queryClient,
-  );
+			if (response.ok) {
+				return await response.json();
+			} else if (response.status === 503) {
+				// Backend is up but provider validation failed
+				const errorData = await response.json().catch(() => ({}));
+				return {
+					status: "unhealthy",
+					message: errorData.message || "Provider validation failed",
+					provider: errorData.provider || params?.provider || "unknown",
+					llm_provider: errorData.llm_provider,
+					embedding_provider: errorData.embedding_provider,
+					llm_error: errorData.llm_error,
+					embedding_error: errorData.embedding_error,
+					details: errorData.details,
+				};
+			} else {
+				// Other backend errors (400, etc.) - treat as provider issues
+				const errorData = await response.json().catch(() => ({}));
+				return {
+					status: "error",
+					message: errorData.message || "Failed to check provider health",
+					provider: errorData.provider || params?.provider || "unknown",
+					llm_provider: errorData.llm_provider,
+					embedding_provider: errorData.embedding_provider,
+					llm_error: errorData.llm_error,
+					embedding_error: errorData.embedding_error,
+					details: errorData.details,
+				};
+			}
+		} catch (error) {
+			// Network error - backend is likely down, don't show provider banner
+			return {
+				status: "backend-unavailable",
+				message: error instanceof Error ? error.message : "Connection failed",
+				provider: params?.provider || "unknown",
+			};
+		}
+	}

-  return queryResult;
+	const queryKey = ["provider", "health", params?.test_completion];
+	const failureCountKey = queryKey.join("-");
+
+	const queryResult = useQuery(
+		{
+			queryKey,
+			queryFn: checkProviderHealth,
+			retry: false, // Don't retry health checks automatically
+			refetchInterval: (query) => {
+				const data = query.state.data;
+				const status = data?.status;
+
+				// If healthy, reset failure count and check every 30 seconds
+				// Also reset chat error flag if we're using test_completion=true and it succeeded
+				if (status === "healthy") {
+					failureCountMap.set(failureCountKey, 0);
+					// If we were checking with test_completion=true due to chat errors, reset the flag
+					if (hasChatError && setChatError) {
+						setChatError(false);
+					}
+					return 30000;
+				}
+
+				// If backend unavailable, use moderate polling
+				if (status === "backend-unavailable") {
+					return 15000;
+				}
+
+				// For unhealthy/error status, use exponential backoff
+				const currentFailures = failureCountMap.get(failureCountKey) || 0;
+				failureCountMap.set(failureCountKey, currentFailures + 1);
+
+				// Exponential backoff: 5s, 10s, 20s, then 30s
+				const backoffDelays = [5000, 10000, 20000, 30000];
+				const delay =
+					backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
+
+				return delay;
+			},
+			refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
+			refetchOnMount: true,
+			staleTime: 30000, // Consider data stale after 30 seconds
+			enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
+			...options,
+		},
+		queryClient,
+	);
+
+	return queryResult;
 };
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@ -51,6 +51,7 @@ function ChatPage() {
  ]);
  const [input, setInput] = useState("");
  const { loading, setLoading } = useLoadingStore();
+  const { setChatError } = useChat();
  const [asyncMode, setAsyncMode] = useState(true);
  const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
    Set<string>
@ -123,6 +124,8 @@ function ChatPage() {
      console.error("Streaming error:", error);
      setLoading(false);
      setWaitingTooLong(false);
+      // Set chat error flag to trigger test_completion=true on health checks
+      setChatError(true);
      const errorMessage: Message = {
        role: "assistant",
        content:
@ -197,6 +200,11 @@ function ChatPage() {
      const result = await response.json();
      console.log("Upload result:", result);

+      if (!response.ok) {
+        // Set chat error flag if upload fails
+        setChatError(true);
+      }
+
      if (response.status === 201) {
        // New flow: Got task ID, start tracking with centralized system
        const taskId = result.task_id || result.id;
@ -255,6 +263,8 @@ function ChatPage() {
      }
    } catch (error) {
      console.error("Upload failed:", error);
+      // Set chat error flag to trigger test_completion=true on health checks
+      setChatError(true);
      const errorMessage: Message = {
        role: "assistant",
        content: `❌ Failed to process document. Please try again.`,
@ -858,6 +868,8 @@ function ChatPage() {
          }
        } else {
          console.error("Chat failed:", result.error);
+          // Set chat error flag to trigger test_completion=true on health checks
+          setChatError(true);
          const errorMessage: Message = {
            role: "assistant",
            content: "Sorry, I encountered an error. Please try again.",
@ -867,6 +879,8 @@ function ChatPage() {
        }
      } catch (error) {
        console.error("Chat error:", error);
+        // Set chat error flag to trigger test_completion=true on health checks
+        setChatError(true);
        const errorMessage: Message = {
          role: "assistant",
          content:
--- a/frontend/components/provider-health-banner.tsx
+++ b/frontend/components/provider-health-banner.tsx
@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
 import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
 import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
 import { cn } from "@/lib/utils";
+import { useChat } from "@/contexts/chat-context";
 import { Button } from "./ui/button";

 interface ProviderHealthBannerProps {
@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {

 // Custom hook to check provider health status
 export function useProviderHealth() {
+  const { hasChatError } = useChat();
  const {
    data: health,
    isLoading,
    isFetching,
    error,
    isError,
-  } = useProviderHealthQuery();
+  } = useProviderHealthQuery({
+    test_completion: hasChatError, // Use test_completion=true when chat errors occur
+  });

  const isHealthy = health?.status === "healthy" && !isError;
  // Only consider unhealthy if backend is up but provider validation failed
--- a/frontend/contexts/chat-context.tsx
+++ b/frontend/contexts/chat-context.tsx
@ -79,6 +79,8 @@ interface ChatContextType {
  conversationFilter: KnowledgeFilter | null;
  // responseId: undefined = use currentConversationId, null = don't save to localStorage
  setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
+  hasChatError: boolean;
+  setChatError: (hasError: boolean) => void;
 }

 const ChatContext = createContext<ChatContextType | undefined>(undefined);
@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
  const [conversationLoaded, setConversationLoaded] = useState(false);
  const [conversationFilter, setConversationFilterState] =
    useState<KnowledgeFilter | null>(null);
+  const [hasChatError, setChatError] = useState(false);

  // Debounce refresh requests to prevent excessive reloads
  const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
      setConversationLoaded,
      conversationFilter,
      setConversationFilter,
+      hasChatError,
+      setChatError,
    }),
    [
      endpoint,
@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
      conversationLoaded,
      conversationFilter,
      setConversationFilter,
+      hasChatError,
    ],
  );