make health check enable completion check if chat is failed

2025-12-03 17:12:06 -03:00 · 2025-12-03 17:12:06 -03:00 · cc5711bb5e
commit cc5711bb5e
parent 9b08f1fcee
4 changed files with 151 additions and 110 deletions
--- a/frontend/app/api/queries/useProviderHealthQuery.ts
+++ b/frontend/app/api/queries/useProviderHealthQuery.ts
@ -1,136 +1,153 @@
 import {
-  type UseQueryOptions,
+	type UseQueryOptions,
-  useQuery,
+	useQuery,
-  useQueryClient,
+	useQueryClient,
 } from "@tanstack/react-query";
 import { useChat } from "@/contexts/chat-context";
 import { useGetSettingsQuery } from "./useGetSettingsQuery";
 export interface ProviderHealthDetails {
-  llm_model: string;
+	llm_model: string;
-  embedding_model: string;
+	embedding_model: string;
-  endpoint?: string | null;
+	endpoint?: string | null;
 }
 export interface ProviderHealthResponse {
-  status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
+	status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
-  message: string;
+	message: string;
-  provider?: string;
+	provider?: string;
-  llm_provider?: string;
+	llm_provider?: string;
-  embedding_provider?: string;
+	embedding_provider?: string;
-  llm_error?: string | null;
+	llm_error?: string | null;
-  embedding_error?: string | null;
+	embedding_error?: string | null;
-  details?: ProviderHealthDetails;
+	details?: ProviderHealthDetails;
 }
 export interface ProviderHealthParams {
-  provider?: "openai" | "ollama" | "watsonx";
+	provider?: "openai" | "ollama" | "watsonx";
 	test_completion?: boolean;
 }
 // Track consecutive failures for exponential backoff
 const failureCountMap = new Map<string, number>();
 export const useProviderHealthQuery = (
-  params?: ProviderHealthParams,
+	params?: ProviderHealthParams,
-  options?: Omit<
+	options?: Omit<
-    UseQueryOptions<ProviderHealthResponse, Error>,
+		UseQueryOptions<ProviderHealthResponse, Error>,
-    "queryKey" | "queryFn"
+		"queryKey" | "queryFn"
-  >,
+	>,
 ) => {
-  const queryClient = useQueryClient();
+	const queryClient = useQueryClient();
-  const { data: settings = {} } = useGetSettingsQuery();
+	// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
 	const { hasChatError, setChatError } = useChat();
-  async function checkProviderHealth(): Promise<ProviderHealthResponse> {
+	const { data: settings = {} } = useGetSettingsQuery();
    try {
      const url = new URL("/api/provider/health", window.location.origin);
-      // Add provider query param if specified
+	async function checkProviderHealth(): Promise<ProviderHealthResponse> {
-      if (params?.provider) {
+		try {
-        url.searchParams.set("provider", params.provider);
+			const url = new URL("/api/provider/health", window.location.origin);
      }
-      const response = await fetch(url.toString());
+			// Add provider query param if specified
 			if (params?.provider) {
 				url.searchParams.set("provider", params.provider);
 			}
-      if (response.ok) {
+			// Add test_completion query param if specified or if chat error exists
-        return await response.json();
+			const testCompletion = params?.test_completion ?? hasChatError;
-      } else if (response.status === 503) {
+			if (testCompletion) {
-        // Backend is up but provider validation failed
+				url.searchParams.set("test_completion", "true");
-        const errorData = await response.json().catch(() => ({}));
+			}
        return {
          status: "unhealthy",
          message: errorData.message || "Provider validation failed",
          provider: errorData.provider || params?.provider || "unknown",
          llm_provider: errorData.llm_provider,
          embedding_provider: errorData.embedding_provider,
          llm_error: errorData.llm_error,
          embedding_error: errorData.embedding_error,
          details: errorData.details,
        };
      } else {
        // Other backend errors (400, etc.) - treat as provider issues
        const errorData = await response.json().catch(() => ({}));
        return {
          status: "error",
          message: errorData.message || "Failed to check provider health",
          provider: errorData.provider || params?.provider || "unknown",
          llm_provider: errorData.llm_provider,
          embedding_provider: errorData.embedding_provider,
          llm_error: errorData.llm_error,
          embedding_error: errorData.embedding_error,
          details: errorData.details,
        };
      }
    } catch (error) {
      // Network error - backend is likely down, don't show provider banner
      return {
        status: "backend-unavailable",
        message: error instanceof Error ? error.message : "Connection failed",
        provider: params?.provider || "unknown",
      };
    }
  }
-  const queryKey = ["provider", "health"];
+			const response = await fetch(url.toString());
  const failureCountKey = queryKey.join("-");
-  const queryResult = useQuery(
+			if (response.ok) {
-    {
+				return await response.json();
-      queryKey,
+			} else if (response.status === 503) {
-      queryFn: checkProviderHealth,
+				// Backend is up but provider validation failed
-      retry: false, // Don't retry health checks automatically
+				const errorData = await response.json().catch(() => ({}));
-      refetchInterval: (query) => {
+				return {
-        const data = query.state.data;
+					status: "unhealthy",
-        const status = data?.status;
+					message: errorData.message || "Provider validation failed",
-        
+					provider: errorData.provider || params?.provider || "unknown",
-        // If healthy, reset failure count and check every 30 seconds
+					llm_provider: errorData.llm_provider,
-        if (status === "healthy") {
+					embedding_provider: errorData.embedding_provider,
-          failureCountMap.set(failureCountKey, 0);
+					llm_error: errorData.llm_error,
-          return 30000;
+					embedding_error: errorData.embedding_error,
-        }
+					details: errorData.details,
-        
+				};
-        // If backend unavailable, use moderate polling
+			} else {
-        if (status === "backend-unavailable") {
+				// Other backend errors (400, etc.) - treat as provider issues
-          return 15000;
+				const errorData = await response.json().catch(() => ({}));
-        }
+				return {
-        
+					status: "error",
-        // For unhealthy/error status, use exponential backoff
+					message: errorData.message || "Failed to check provider health",
-        const currentFailures = failureCountMap.get(failureCountKey) || 0;
+					provider: errorData.provider || params?.provider || "unknown",
-        failureCountMap.set(failureCountKey, currentFailures + 1);
+					llm_provider: errorData.llm_provider,
-        
+					embedding_provider: errorData.embedding_provider,
-        // Exponential backoff: 5s, 10s, 20s, then 30s
+					llm_error: errorData.llm_error,
-        const backoffDelays = [5000, 10000, 20000, 30000];
+					embedding_error: errorData.embedding_error,
-        const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
+					details: errorData.details,
-        
+				};
-        return delay;
+			}
-      },
+		} catch (error) {
-      refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
+			// Network error - backend is likely down, don't show provider banner
-      refetchOnMount: true,
+			return {
-      staleTime: 30000, // Consider data stale after 30 seconds
+				status: "backend-unavailable",
-      enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
+				message: error instanceof Error ? error.message : "Connection failed",
-      ...options,
+				provider: params?.provider || "unknown",
-    },
+			};
-    queryClient,
+		}
-  );
+	}
-  return queryResult;
+	const queryKey = ["provider", "health", params?.test_completion];
 	const failureCountKey = queryKey.join("-");
 	const queryResult = useQuery(
 		{
 			queryKey,
 			queryFn: checkProviderHealth,
 			retry: false, // Don't retry health checks automatically
 			refetchInterval: (query) => {
 				const data = query.state.data;
 				const status = data?.status;
 				// If healthy, reset failure count and check every 30 seconds
 				// Also reset chat error flag if we're using test_completion=true and it succeeded
 				if (status === "healthy") {
 					failureCountMap.set(failureCountKey, 0);
 					// If we were checking with test_completion=true due to chat errors, reset the flag
 					if (hasChatError && setChatError) {
 						setChatError(false);
 					}
 					return 30000;
 				}
 				// If backend unavailable, use moderate polling
 				if (status === "backend-unavailable") {
 					return 15000;
 				}
 				// For unhealthy/error status, use exponential backoff
 				const currentFailures = failureCountMap.get(failureCountKey) || 0;
 				failureCountMap.set(failureCountKey, currentFailures + 1);
 				// Exponential backoff: 5s, 10s, 20s, then 30s
 				const backoffDelays = [5000, 10000, 20000, 30000];
 				const delay =
 					backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
 				return delay;
 			},
 			refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
 			refetchOnMount: true,
 			staleTime: 30000, // Consider data stale after 30 seconds
 			enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
 			...options,
 		},
 		queryClient,
 	);
 	return queryResult;
 };
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@ -51,6 +51,7 @@ function ChatPage() {
  ]);
  const [input, setInput] = useState("");
  const { loading, setLoading } = useLoadingStore();
  const { setChatError } = useChat();
  const [asyncMode, setAsyncMode] = useState(true);
  const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
    Set<string>
@ -123,6 +124,8 @@ function ChatPage() {
      console.error("Streaming error:", error);
      setLoading(false);
      setWaitingTooLong(false);
      // Set chat error flag to trigger test_completion=true on health checks
      setChatError(true);
      const errorMessage: Message = {
        role: "assistant",
        content:
@ -197,6 +200,11 @@ function ChatPage() {
      const result = await response.json();
      console.log("Upload result:", result);
      if (!response.ok) {
        // Set chat error flag if upload fails
        setChatError(true);
      }
      if (response.status === 201) {
        // New flow: Got task ID, start tracking with centralized system
        const taskId = result.task_id || result.id;
@ -255,6 +263,8 @@ function ChatPage() {
      }
    } catch (error) {
      console.error("Upload failed:", error);
      // Set chat error flag to trigger test_completion=true on health checks
      setChatError(true);
      const errorMessage: Message = {
        role: "assistant",
        content: `❌ Failed to process document. Please try again.`,
@ -858,6 +868,8 @@ function ChatPage() {
          }
        } else {
          console.error("Chat failed:", result.error);
          // Set chat error flag to trigger test_completion=true on health checks
          setChatError(true);
          const errorMessage: Message = {
            role: "assistant",
            content: "Sorry, I encountered an error. Please try again.",
@ -867,6 +879,8 @@ function ChatPage() {
        }
      } catch (error) {
        console.error("Chat error:", error);
        // Set chat error flag to trigger test_completion=true on health checks
        setChatError(true);
        const errorMessage: Message = {
          role: "assistant",
          content:
--- a/frontend/components/provider-health-banner.tsx
+++ b/frontend/components/provider-health-banner.tsx
@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
 import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
 import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
 import { cn } from "@/lib/utils";
 import { useChat } from "@/contexts/chat-context";
 import { Button } from "./ui/button";
 interface ProviderHealthBannerProps {
@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {
 // Custom hook to check provider health status
 export function useProviderHealth() {
  const { hasChatError } = useChat();
  const {
    data: health,
    isLoading,
    isFetching,
    error,
    isError,
-  } = useProviderHealthQuery();
+  } = useProviderHealthQuery({
    test_completion: hasChatError, // Use test_completion=true when chat errors occur
  });
  const isHealthy = health?.status === "healthy" && !isError;
  // Only consider unhealthy if backend is up but provider validation failed
--- a/frontend/contexts/chat-context.tsx
+++ b/frontend/contexts/chat-context.tsx
@ -79,6 +79,8 @@ interface ChatContextType {
  conversationFilter: KnowledgeFilter | null;
  // responseId: undefined = use currentConversationId, null = don't save to localStorage
  setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
  hasChatError: boolean;
  setChatError: (hasError: boolean) => void;
 }
 const ChatContext = createContext<ChatContextType | undefined>(undefined);
@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
  const [conversationLoaded, setConversationLoaded] = useState(false);
  const [conversationFilter, setConversationFilterState] =
    useState<KnowledgeFilter | null>(null);
  const [hasChatError, setChatError] = useState(false);
  // Debounce refresh requests to prevent excessive reloads
  const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
      setConversationLoaded,
      conversationFilter,
      setConversationFilter,
      hasChatError,
      setChatError,
    }),
    [
      endpoint,
@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
      conversationLoaded,
      conversationFilter,
      setConversationFilter,
      hasChatError,
    ],
  );