make health check enable completion check if chat is failed

This commit is contained in:
Lucas Oliveira 2025-12-03 17:12:06 -03:00
parent 9b08f1fcee
commit cc5711bb5e
4 changed files with 151 additions and 110 deletions

View file

@ -1,136 +1,153 @@
import {
type UseQueryOptions,
useQuery,
useQueryClient,
type UseQueryOptions,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
import { useChat } from "@/contexts/chat-context";
import { useGetSettingsQuery } from "./useGetSettingsQuery";
export interface ProviderHealthDetails {
llm_model: string;
embedding_model: string;
endpoint?: string | null;
llm_model: string;
embedding_model: string;
endpoint?: string | null;
}
export interface ProviderHealthResponse {
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
message: string;
provider?: string;
llm_provider?: string;
embedding_provider?: string;
llm_error?: string | null;
embedding_error?: string | null;
details?: ProviderHealthDetails;
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
message: string;
provider?: string;
llm_provider?: string;
embedding_provider?: string;
llm_error?: string | null;
embedding_error?: string | null;
details?: ProviderHealthDetails;
}
export interface ProviderHealthParams {
provider?: "openai" | "ollama" | "watsonx";
provider?: "openai" | "ollama" | "watsonx";
test_completion?: boolean;
}
// Track consecutive failures for exponential backoff
const failureCountMap = new Map<string, number>();
export const useProviderHealthQuery = (
params?: ProviderHealthParams,
options?: Omit<
UseQueryOptions<ProviderHealthResponse, Error>,
"queryKey" | "queryFn"
>,
params?: ProviderHealthParams,
options?: Omit<
UseQueryOptions<ProviderHealthResponse, Error>,
"queryKey" | "queryFn"
>,
) => {
const queryClient = useQueryClient();
const queryClient = useQueryClient();
const { data: settings = {} } = useGetSettingsQuery();
// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
const { hasChatError, setChatError } = useChat();
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
try {
const url = new URL("/api/provider/health", window.location.origin);
const { data: settings = {} } = useGetSettingsQuery();
// Add provider query param if specified
if (params?.provider) {
url.searchParams.set("provider", params.provider);
}
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
try {
const url = new URL("/api/provider/health", window.location.origin);
const response = await fetch(url.toString());
// Add provider query param if specified
if (params?.provider) {
url.searchParams.set("provider", params.provider);
}
if (response.ok) {
return await response.json();
} else if (response.status === 503) {
// Backend is up but provider validation failed
const errorData = await response.json().catch(() => ({}));
return {
status: "unhealthy",
message: errorData.message || "Provider validation failed",
provider: errorData.provider || params?.provider || "unknown",
llm_provider: errorData.llm_provider,
embedding_provider: errorData.embedding_provider,
llm_error: errorData.llm_error,
embedding_error: errorData.embedding_error,
details: errorData.details,
};
} else {
// Other backend errors (400, etc.) - treat as provider issues
const errorData = await response.json().catch(() => ({}));
return {
status: "error",
message: errorData.message || "Failed to check provider health",
provider: errorData.provider || params?.provider || "unknown",
llm_provider: errorData.llm_provider,
embedding_provider: errorData.embedding_provider,
llm_error: errorData.llm_error,
embedding_error: errorData.embedding_error,
details: errorData.details,
};
}
} catch (error) {
// Network error - backend is likely down, don't show provider banner
return {
status: "backend-unavailable",
message: error instanceof Error ? error.message : "Connection failed",
provider: params?.provider || "unknown",
};
}
}
// Add test_completion query param if specified or if chat error exists
const testCompletion = params?.test_completion ?? hasChatError;
if (testCompletion) {
url.searchParams.set("test_completion", "true");
}
const queryKey = ["provider", "health"];
const failureCountKey = queryKey.join("-");
const response = await fetch(url.toString());
const queryResult = useQuery(
{
queryKey,
queryFn: checkProviderHealth,
retry: false, // Don't retry health checks automatically
refetchInterval: (query) => {
const data = query.state.data;
const status = data?.status;
// If healthy, reset failure count and check every 30 seconds
if (status === "healthy") {
failureCountMap.set(failureCountKey, 0);
return 30000;
}
// If backend unavailable, use moderate polling
if (status === "backend-unavailable") {
return 15000;
}
// For unhealthy/error status, use exponential backoff
const currentFailures = failureCountMap.get(failureCountKey) || 0;
failureCountMap.set(failureCountKey, currentFailures + 1);
// Exponential backoff: 5s, 10s, 20s, then 30s
const backoffDelays = [5000, 10000, 20000, 30000];
const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
return delay;
},
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
refetchOnMount: true,
staleTime: 30000, // Consider data stale after 30 seconds
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
...options,
},
queryClient,
);
if (response.ok) {
return await response.json();
} else if (response.status === 503) {
// Backend is up but provider validation failed
const errorData = await response.json().catch(() => ({}));
return {
status: "unhealthy",
message: errorData.message || "Provider validation failed",
provider: errorData.provider || params?.provider || "unknown",
llm_provider: errorData.llm_provider,
embedding_provider: errorData.embedding_provider,
llm_error: errorData.llm_error,
embedding_error: errorData.embedding_error,
details: errorData.details,
};
} else {
// Other backend errors (400, etc.) - treat as provider issues
const errorData = await response.json().catch(() => ({}));
return {
status: "error",
message: errorData.message || "Failed to check provider health",
provider: errorData.provider || params?.provider || "unknown",
llm_provider: errorData.llm_provider,
embedding_provider: errorData.embedding_provider,
llm_error: errorData.llm_error,
embedding_error: errorData.embedding_error,
details: errorData.details,
};
}
} catch (error) {
// Network error - backend is likely down, don't show provider banner
return {
status: "backend-unavailable",
message: error instanceof Error ? error.message : "Connection failed",
provider: params?.provider || "unknown",
};
}
}
return queryResult;
const queryKey = ["provider", "health", params?.test_completion];
const failureCountKey = queryKey.join("-");
const queryResult = useQuery(
{
queryKey,
queryFn: checkProviderHealth,
retry: false, // Don't retry health checks automatically
refetchInterval: (query) => {
const data = query.state.data;
const status = data?.status;
// If healthy, reset failure count and check every 30 seconds
// Also reset chat error flag if we're using test_completion=true and it succeeded
if (status === "healthy") {
failureCountMap.set(failureCountKey, 0);
// If we were checking with test_completion=true due to chat errors, reset the flag
if (hasChatError && setChatError) {
setChatError(false);
}
return 30000;
}
// If backend unavailable, use moderate polling
if (status === "backend-unavailable") {
return 15000;
}
// For unhealthy/error status, use exponential backoff
const currentFailures = failureCountMap.get(failureCountKey) || 0;
failureCountMap.set(failureCountKey, currentFailures + 1);
// Exponential backoff: 5s, 10s, 20s, then 30s
const backoffDelays = [5000, 10000, 20000, 30000];
const delay =
backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
return delay;
},
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
refetchOnMount: true,
staleTime: 30000, // Consider data stale after 30 seconds
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
...options,
},
queryClient,
);
return queryResult;
};

View file

@ -51,6 +51,7 @@ function ChatPage() {
]);
const [input, setInput] = useState("");
const { loading, setLoading } = useLoadingStore();
const { setChatError } = useChat();
const [asyncMode, setAsyncMode] = useState(true);
const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
Set<string>
@ -123,6 +124,8 @@ function ChatPage() {
console.error("Streaming error:", error);
setLoading(false);
setWaitingTooLong(false);
// Set chat error flag to trigger test_completion=true on health checks
setChatError(true);
const errorMessage: Message = {
role: "assistant",
content:
@ -197,6 +200,11 @@ function ChatPage() {
const result = await response.json();
console.log("Upload result:", result);
if (!response.ok) {
// Set chat error flag if upload fails
setChatError(true);
}
if (response.status === 201) {
// New flow: Got task ID, start tracking with centralized system
const taskId = result.task_id || result.id;
@ -255,6 +263,8 @@ function ChatPage() {
}
} catch (error) {
console.error("Upload failed:", error);
// Set chat error flag to trigger test_completion=true on health checks
setChatError(true);
const errorMessage: Message = {
role: "assistant",
content: `❌ Failed to process document. Please try again.`,
@ -858,6 +868,8 @@ function ChatPage() {
}
} else {
console.error("Chat failed:", result.error);
// Set chat error flag to trigger test_completion=true on health checks
setChatError(true);
const errorMessage: Message = {
role: "assistant",
content: "Sorry, I encountered an error. Please try again.",
@ -867,6 +879,8 @@ function ChatPage() {
}
} catch (error) {
console.error("Chat error:", error);
// Set chat error flag to trigger test_completion=true on health checks
setChatError(true);
const errorMessage: Message = {
role: "assistant",
content:

View file

@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
import { cn } from "@/lib/utils";
import { useChat } from "@/contexts/chat-context";
import { Button } from "./ui/button";
interface ProviderHealthBannerProps {
@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {
// Custom hook to check provider health status
export function useProviderHealth() {
const { hasChatError } = useChat();
const {
data: health,
isLoading,
isFetching,
error,
isError,
} = useProviderHealthQuery();
} = useProviderHealthQuery({
test_completion: hasChatError, // Use test_completion=true when chat errors occur
});
const isHealthy = health?.status === "healthy" && !isError;
// Only consider unhealthy if backend is up but provider validation failed

View file

@ -79,6 +79,8 @@ interface ChatContextType {
conversationFilter: KnowledgeFilter | null;
// responseId: undefined = use currentConversationId, null = don't save to localStorage
setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
hasChatError: boolean;
setChatError: (hasError: boolean) => void;
}
const ChatContext = createContext<ChatContextType | undefined>(undefined);
@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
const [conversationLoaded, setConversationLoaded] = useState(false);
const [conversationFilter, setConversationFilterState] =
useState<KnowledgeFilter | null>(null);
const [hasChatError, setChatError] = useState(false);
// Debounce refresh requests to prevent excessive reloads
const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
setConversationLoaded,
conversationFilter,
setConversationFilter,
hasChatError,
setChatError,
}),
[
endpoint,
@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
conversationLoaded,
conversationFilter,
setConversationFilter,
hasChatError,
],
);